sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOMETRY, 384 TokenType.POINT, 385 TokenType.RING, 386 TokenType.LINESTRING, 387 TokenType.MULTILINESTRING, 388 TokenType.POLYGON, 389 TokenType.MULTIPOLYGON, 390 TokenType.HLLSKETCH, 391 TokenType.HSTORE, 392 TokenType.PSEUDO_TYPE, 393 TokenType.SUPER, 394 TokenType.SERIAL, 395 TokenType.SMALLSERIAL, 396 TokenType.BIGSERIAL, 397 TokenType.XML, 398 TokenType.YEAR, 399 TokenType.USERDEFINED, 400 TokenType.MONEY, 401 TokenType.SMALLMONEY, 402 TokenType.ROWVERSION, 403 TokenType.IMAGE, 404 TokenType.VARIANT, 405 TokenType.VECTOR, 406 TokenType.VOID, 407 TokenType.OBJECT, 408 TokenType.OBJECT_IDENTIFIER, 409 TokenType.INET, 410 TokenType.IPADDRESS, 411 TokenType.IPPREFIX, 412 TokenType.IPV4, 413 TokenType.IPV6, 414 TokenType.UNKNOWN, 415 TokenType.NOTHING, 416 TokenType.NULL, 417 TokenType.NAME, 418 TokenType.TDIGEST, 419 TokenType.DYNAMIC, 420 *ENUM_TYPE_TOKENS, 421 *NESTED_TYPE_TOKENS, 422 *AGGREGATE_TYPE_TOKENS, 423 } 424 425 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 426 TokenType.BIGINT: TokenType.UBIGINT, 427 TokenType.INT: TokenType.UINT, 428 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 429 TokenType.SMALLINT: TokenType.USMALLINT, 430 TokenType.TINYINT: TokenType.UTINYINT, 431 TokenType.DECIMAL: TokenType.UDECIMAL, 432 TokenType.DOUBLE: TokenType.UDOUBLE, 433 } 434 435 SUBQUERY_PREDICATES = { 436 TokenType.ANY: exp.Any, 437 TokenType.ALL: exp.All, 438 TokenType.EXISTS: exp.Exists, 439 TokenType.SOME: exp.Any, 440 } 441 442 RESERVED_TOKENS = { 443 *Tokenizer.SINGLE_TOKENS.values(), 444 TokenType.SELECT, 445 } - {TokenType.IDENTIFIER} 446 447 DB_CREATABLES = { 448 TokenType.DATABASE, 449 TokenType.DICTIONARY, 450 TokenType.FILE_FORMAT, 451 TokenType.MODEL, 452 TokenType.NAMESPACE, 453 TokenType.SCHEMA, 454 TokenType.SEQUENCE, 455 TokenType.SINK, 456 TokenType.SOURCE, 457 TokenType.STAGE, 458 TokenType.STORAGE_INTEGRATION, 459 TokenType.STREAMLIT, 460 TokenType.TABLE, 461 TokenType.TAG, 462 TokenType.VIEW, 463 TokenType.WAREHOUSE, 464 } 465 466 CREATABLES = { 467 TokenType.COLUMN, 468 TokenType.CONSTRAINT, 469 TokenType.FOREIGN_KEY, 470 TokenType.FUNCTION, 471 TokenType.INDEX, 472 TokenType.PROCEDURE, 473 *DB_CREATABLES, 474 } 475 476 ALTERABLES = { 477 TokenType.INDEX, 478 TokenType.TABLE, 479 TokenType.VIEW, 480 } 481 482 # Tokens that can represent identifiers 483 ID_VAR_TOKENS = { 484 TokenType.ALL, 485 TokenType.ATTACH, 486 TokenType.VAR, 487 TokenType.ANTI, 488 TokenType.APPLY, 489 TokenType.ASC, 490 TokenType.ASOF, 491 TokenType.AUTO_INCREMENT, 492 TokenType.BEGIN, 493 TokenType.BPCHAR, 494 TokenType.CACHE, 495 TokenType.CASE, 496 TokenType.COLLATE, 497 TokenType.COMMAND, 498 TokenType.COMMENT, 499 TokenType.COMMIT, 500 TokenType.CONSTRAINT, 501 TokenType.COPY, 502 TokenType.CUBE, 503 TokenType.CURRENT_SCHEMA, 504 TokenType.DEFAULT, 505 TokenType.DELETE, 506 TokenType.DESC, 507 TokenType.DESCRIBE, 508 TokenType.DETACH, 509 TokenType.DICTIONARY, 510 TokenType.DIV, 511 TokenType.END, 512 TokenType.EXECUTE, 513 TokenType.EXPORT, 514 TokenType.ESCAPE, 515 TokenType.FALSE, 516 TokenType.FIRST, 517 TokenType.FILTER, 518 TokenType.FINAL, 519 TokenType.FORMAT, 520 TokenType.FULL, 521 TokenType.GET, 522 TokenType.IDENTIFIER, 523 TokenType.IS, 524 TokenType.ISNULL, 525 TokenType.INTERVAL, 526 TokenType.KEEP, 527 TokenType.KILL, 528 TokenType.LEFT, 529 TokenType.LIMIT, 530 TokenType.LOAD, 531 TokenType.MERGE, 532 TokenType.NATURAL, 533 TokenType.NEXT, 534 TokenType.OFFSET, 535 TokenType.OPERATOR, 536 TokenType.ORDINALITY, 537 TokenType.OVERLAPS, 538 TokenType.OVERWRITE, 539 TokenType.PARTITION, 540 TokenType.PERCENT, 541 TokenType.PIVOT, 542 TokenType.PRAGMA, 543 TokenType.PUT, 544 TokenType.RANGE, 545 TokenType.RECURSIVE, 546 TokenType.REFERENCES, 547 TokenType.REFRESH, 548 TokenType.RENAME, 549 TokenType.REPLACE, 550 TokenType.RIGHT, 551 TokenType.ROLLUP, 552 TokenType.ROW, 553 TokenType.ROWS, 554 TokenType.SEMI, 555 TokenType.SET, 556 TokenType.SETTINGS, 557 TokenType.SHOW, 558 TokenType.TEMPORARY, 559 TokenType.TOP, 560 TokenType.TRUE, 561 TokenType.TRUNCATE, 562 TokenType.UNIQUE, 563 TokenType.UNNEST, 564 TokenType.UNPIVOT, 565 TokenType.UPDATE, 566 TokenType.USE, 567 TokenType.VOLATILE, 568 TokenType.WINDOW, 569 *CREATABLES, 570 *SUBQUERY_PREDICATES, 571 *TYPE_TOKENS, 572 *NO_PAREN_FUNCTIONS, 573 } 574 ID_VAR_TOKENS.remove(TokenType.UNION) 575 576 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 577 TokenType.ANTI, 578 TokenType.APPLY, 579 TokenType.ASOF, 580 TokenType.FULL, 581 TokenType.LEFT, 582 TokenType.LOCK, 583 TokenType.NATURAL, 584 TokenType.RIGHT, 585 TokenType.SEMI, 586 TokenType.WINDOW, 587 } 588 589 ALIAS_TOKENS = ID_VAR_TOKENS 590 591 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 592 593 ARRAY_CONSTRUCTORS = { 594 "ARRAY": exp.Array, 595 "LIST": exp.List, 596 } 597 598 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 599 600 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 601 602 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 603 604 FUNC_TOKENS = { 605 TokenType.COLLATE, 606 TokenType.COMMAND, 607 TokenType.CURRENT_DATE, 608 TokenType.CURRENT_DATETIME, 609 TokenType.CURRENT_SCHEMA, 610 TokenType.CURRENT_TIMESTAMP, 611 TokenType.CURRENT_TIME, 612 TokenType.CURRENT_USER, 613 TokenType.FILTER, 614 TokenType.FIRST, 615 TokenType.FORMAT, 616 TokenType.GET, 617 TokenType.GLOB, 618 TokenType.IDENTIFIER, 619 TokenType.INDEX, 620 TokenType.ISNULL, 621 TokenType.ILIKE, 622 TokenType.INSERT, 623 TokenType.LIKE, 624 TokenType.MERGE, 625 TokenType.NEXT, 626 TokenType.OFFSET, 627 TokenType.PRIMARY_KEY, 628 TokenType.RANGE, 629 TokenType.REPLACE, 630 TokenType.RLIKE, 631 TokenType.ROW, 632 TokenType.UNNEST, 633 TokenType.VAR, 634 TokenType.LEFT, 635 TokenType.RIGHT, 636 TokenType.SEQUENCE, 637 TokenType.DATE, 638 TokenType.DATETIME, 639 TokenType.TABLE, 640 TokenType.TIMESTAMP, 641 TokenType.TIMESTAMPTZ, 642 TokenType.TRUNCATE, 643 TokenType.WINDOW, 644 TokenType.XOR, 645 *TYPE_TOKENS, 646 *SUBQUERY_PREDICATES, 647 } 648 649 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.AND: exp.And, 651 } 652 653 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.COLON_EQ: exp.PropertyEQ, 655 } 656 657 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.OR: exp.Or, 659 } 660 661 EQUALITY = { 662 TokenType.EQ: exp.EQ, 663 TokenType.NEQ: exp.NEQ, 664 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 665 } 666 667 COMPARISON = { 668 TokenType.GT: exp.GT, 669 TokenType.GTE: exp.GTE, 670 TokenType.LT: exp.LT, 671 TokenType.LTE: exp.LTE, 672 } 673 674 BITWISE = { 675 TokenType.AMP: exp.BitwiseAnd, 676 TokenType.CARET: exp.BitwiseXor, 677 TokenType.PIPE: exp.BitwiseOr, 678 } 679 680 TERM = { 681 TokenType.DASH: exp.Sub, 682 TokenType.PLUS: exp.Add, 683 TokenType.MOD: exp.Mod, 684 TokenType.COLLATE: exp.Collate, 685 } 686 687 FACTOR = { 688 TokenType.DIV: exp.IntDiv, 689 TokenType.LR_ARROW: exp.Distance, 690 TokenType.SLASH: exp.Div, 691 TokenType.STAR: exp.Mul, 692 } 693 694 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 695 696 TIMES = { 697 TokenType.TIME, 698 TokenType.TIMETZ, 699 } 700 701 TIMESTAMPS = { 702 TokenType.TIMESTAMP, 703 TokenType.TIMESTAMPNTZ, 704 TokenType.TIMESTAMPTZ, 705 TokenType.TIMESTAMPLTZ, 706 *TIMES, 707 } 708 709 SET_OPERATIONS = { 710 TokenType.UNION, 711 TokenType.INTERSECT, 712 TokenType.EXCEPT, 713 } 714 715 JOIN_METHODS = { 716 TokenType.ASOF, 717 TokenType.NATURAL, 718 TokenType.POSITIONAL, 719 } 720 721 JOIN_SIDES = { 722 TokenType.LEFT, 723 TokenType.RIGHT, 724 TokenType.FULL, 725 } 726 727 JOIN_KINDS = { 728 TokenType.ANTI, 729 TokenType.CROSS, 730 TokenType.INNER, 731 TokenType.OUTER, 732 TokenType.SEMI, 733 TokenType.STRAIGHT_JOIN, 734 } 735 736 JOIN_HINTS: t.Set[str] = set() 737 738 LAMBDAS = { 739 TokenType.ARROW: lambda self, expressions: self.expression( 740 exp.Lambda, 741 this=self._replace_lambda( 742 self._parse_assignment(), 743 expressions, 744 ), 745 expressions=expressions, 746 ), 747 TokenType.FARROW: lambda self, expressions: self.expression( 748 exp.Kwarg, 749 this=exp.var(expressions[0].name), 750 expression=self._parse_assignment(), 751 ), 752 } 753 754 COLUMN_OPERATORS = { 755 TokenType.DOT: None, 756 TokenType.DOTCOLON: lambda self, this, to: self.expression( 757 exp.JSONCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.DCOLON: lambda self, this, to: self.expression( 762 exp.Cast if self.STRICT_CAST else exp.TryCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1141 klass = ( 1142 exp.PartitionedByBucket 1143 if self._prev.text.upper() == "BUCKET" 1144 else exp.PartitionByTruncate 1145 ) 1146 1147 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1148 this, expression = seq_get(args, 0), seq_get(args, 1) 1149 1150 if isinstance(this, exp.Literal): 1151 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1152 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1153 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1154 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1155 # 1156 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1157 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1158 this, expression = expression, this 1159 1160 return self.expression(klass, this=this, expression=expression) 1161 1162 ALTER_PARSERS = { 1163 "ADD": lambda self: self._parse_alter_table_add(), 1164 "AS": lambda self: self._parse_select(), 1165 "ALTER": lambda self: self._parse_alter_table_alter(), 1166 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1167 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1168 "DROP": lambda self: self._parse_alter_table_drop(), 1169 "RENAME": lambda self: self._parse_alter_table_rename(), 1170 "SET": lambda self: self._parse_alter_table_set(), 1171 "SWAP": lambda self: self.expression( 1172 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1173 ), 1174 } 1175 1176 ALTER_ALTER_PARSERS = { 1177 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1178 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1179 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1180 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1181 } 1182 1183 SCHEMA_UNNAMED_CONSTRAINTS = { 1184 "CHECK", 1185 "EXCLUDE", 1186 "FOREIGN KEY", 1187 "LIKE", 1188 "PERIOD", 1189 "PRIMARY KEY", 1190 "UNIQUE", 1191 "WATERMARK", 1192 "BUCKET", 1193 "TRUNCATE", 1194 } 1195 1196 NO_PAREN_FUNCTION_PARSERS = { 1197 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1198 "CASE": lambda self: self._parse_case(), 1199 "CONNECT_BY_ROOT": lambda self: self.expression( 1200 exp.ConnectByRoot, this=self._parse_column() 1201 ), 1202 "IF": lambda self: self._parse_if(), 1203 } 1204 1205 INVALID_FUNC_NAME_TOKENS = { 1206 TokenType.IDENTIFIER, 1207 TokenType.STRING, 1208 } 1209 1210 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1211 1212 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1213 1214 FUNCTION_PARSERS = { 1215 **{ 1216 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1217 }, 1218 **{ 1219 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1220 }, 1221 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1222 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1223 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1224 "DECODE": lambda self: self._parse_decode(), 1225 "EXTRACT": lambda self: self._parse_extract(), 1226 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1227 "GAP_FILL": lambda self: self._parse_gap_fill(), 1228 "JSON_OBJECT": lambda self: self._parse_json_object(), 1229 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1230 "JSON_TABLE": lambda self: self._parse_json_table(), 1231 "MATCH": lambda self: self._parse_match_against(), 1232 "NORMALIZE": lambda self: self._parse_normalize(), 1233 "OPENJSON": lambda self: self._parse_open_json(), 1234 "OVERLAY": lambda self: self._parse_overlay(), 1235 "POSITION": lambda self: self._parse_position(), 1236 "PREDICT": lambda self: self._parse_predict(), 1237 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1238 "STRING_AGG": lambda self: self._parse_string_agg(), 1239 "SUBSTRING": lambda self: self._parse_substring(), 1240 "TRIM": lambda self: self._parse_trim(), 1241 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1242 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1243 "XMLELEMENT": lambda self: self.expression( 1244 exp.XMLElement, 1245 this=self._match_text_seq("NAME") and self._parse_id_var(), 1246 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1247 ), 1248 "XMLTABLE": lambda self: self._parse_xml_table(), 1249 } 1250 1251 QUERY_MODIFIER_PARSERS = { 1252 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1253 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1254 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1255 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1256 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1257 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1258 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1259 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1260 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1261 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1262 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1263 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1264 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1265 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.CLUSTER_BY: lambda self: ( 1268 "cluster", 1269 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1270 ), 1271 TokenType.DISTRIBUTE_BY: lambda self: ( 1272 "distribute", 1273 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1274 ), 1275 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1276 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1277 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1278 } 1279 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1280 1281 SET_PARSERS = { 1282 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1283 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1284 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1285 "TRANSACTION": lambda self: self._parse_set_transaction(), 1286 } 1287 1288 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1289 1290 TYPE_LITERAL_PARSERS = { 1291 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1292 } 1293 1294 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1295 1296 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1297 1298 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1299 1300 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1301 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1302 "ISOLATION": ( 1303 ("LEVEL", "REPEATABLE", "READ"), 1304 ("LEVEL", "READ", "COMMITTED"), 1305 ("LEVEL", "READ", "UNCOMITTED"), 1306 ("LEVEL", "SERIALIZABLE"), 1307 ), 1308 "READ": ("WRITE", "ONLY"), 1309 } 1310 1311 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1312 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1313 ) 1314 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1315 1316 CREATE_SEQUENCE: OPTIONS_TYPE = { 1317 "SCALE": ("EXTEND", "NOEXTEND"), 1318 "SHARD": ("EXTEND", "NOEXTEND"), 1319 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1320 **dict.fromkeys( 1321 ( 1322 "SESSION", 1323 "GLOBAL", 1324 "KEEP", 1325 "NOKEEP", 1326 "ORDER", 1327 "NOORDER", 1328 "NOCACHE", 1329 "CYCLE", 1330 "NOCYCLE", 1331 "NOMINVALUE", 1332 "NOMAXVALUE", 1333 "NOSCALE", 1334 "NOSHARD", 1335 ), 1336 tuple(), 1337 ), 1338 } 1339 1340 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1341 1342 USABLES: OPTIONS_TYPE = dict.fromkeys( 1343 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1344 ) 1345 1346 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1347 1348 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1349 "TYPE": ("EVOLUTION",), 1350 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1351 } 1352 1353 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1354 1355 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1356 1357 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1358 "NOT": ("ENFORCED",), 1359 "MATCH": ( 1360 "FULL", 1361 "PARTIAL", 1362 "SIMPLE", 1363 ), 1364 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1365 "USING": ( 1366 "BTREE", 1367 "HASH", 1368 ), 1369 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1370 } 1371 1372 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1373 "NO": ("OTHERS",), 1374 "CURRENT": ("ROW",), 1375 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1376 } 1377 1378 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1379 1380 CLONE_KEYWORDS = {"CLONE", "COPY"} 1381 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1382 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1383 1384 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1385 1386 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1387 1388 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1389 1390 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1391 1392 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1393 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1394 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1395 1396 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1397 1398 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1399 1400 ADD_CONSTRAINT_TOKENS = { 1401 TokenType.CONSTRAINT, 1402 TokenType.FOREIGN_KEY, 1403 TokenType.INDEX, 1404 TokenType.KEY, 1405 TokenType.PRIMARY_KEY, 1406 TokenType.UNIQUE, 1407 } 1408 1409 DISTINCT_TOKENS = {TokenType.DISTINCT} 1410 1411 NULL_TOKENS = {TokenType.NULL} 1412 1413 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1414 1415 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1416 1417 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1418 1419 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1420 1421 ODBC_DATETIME_LITERALS = { 1422 "d": exp.Date, 1423 "t": exp.Time, 1424 "ts": exp.Timestamp, 1425 } 1426 1427 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1428 1429 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1430 1431 # The style options for the DESCRIBE statement 1432 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1433 1434 # The style options for the ANALYZE statement 1435 ANALYZE_STYLES = { 1436 "BUFFER_USAGE_LIMIT", 1437 "FULL", 1438 "LOCAL", 1439 "NO_WRITE_TO_BINLOG", 1440 "SAMPLE", 1441 "SKIP_LOCKED", 1442 "VERBOSE", 1443 } 1444 1445 ANALYZE_EXPRESSION_PARSERS = { 1446 "ALL": lambda self: self._parse_analyze_columns(), 1447 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1448 "DELETE": lambda self: self._parse_analyze_delete(), 1449 "DROP": lambda self: self._parse_analyze_histogram(), 1450 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1451 "LIST": lambda self: self._parse_analyze_list(), 1452 "PREDICATE": lambda self: self._parse_analyze_columns(), 1453 "UPDATE": lambda self: self._parse_analyze_histogram(), 1454 "VALIDATE": lambda self: self._parse_analyze_validate(), 1455 } 1456 1457 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1458 1459 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1460 1461 OPERATION_MODIFIERS: t.Set[str] = set() 1462 1463 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1464 1465 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1466 1467 STRICT_CAST = True 1468 1469 PREFIXED_PIVOT_COLUMNS = False 1470 IDENTIFY_PIVOT_STRINGS = False 1471 1472 LOG_DEFAULTS_TO_LN = False 1473 1474 # Whether the table sample clause expects CSV syntax 1475 TABLESAMPLE_CSV = False 1476 1477 # The default method used for table sampling 1478 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1479 1480 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1481 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1482 1483 # Whether the TRIM function expects the characters to trim as its first argument 1484 TRIM_PATTERN_FIRST = False 1485 1486 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1487 STRING_ALIASES = False 1488 1489 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1490 MODIFIERS_ATTACHED_TO_SET_OP = True 1491 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1492 1493 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1494 NO_PAREN_IF_COMMANDS = True 1495 1496 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1497 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1498 1499 # Whether the `:` operator is used to extract a value from a VARIANT column 1500 COLON_IS_VARIANT_EXTRACT = False 1501 1502 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1503 # If this is True and '(' is not found, the keyword will be treated as an identifier 1504 VALUES_FOLLOWED_BY_PAREN = True 1505 1506 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1507 SUPPORTS_IMPLICIT_UNNEST = False 1508 1509 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1510 INTERVAL_SPANS = True 1511 1512 # Whether a PARTITION clause can follow a table reference 1513 SUPPORTS_PARTITION_SELECTION = False 1514 1515 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1516 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1517 1518 # Whether the 'AS' keyword is optional in the CTE definition syntax 1519 OPTIONAL_ALIAS_TOKEN_CTE = True 1520 1521 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1522 ALTER_RENAME_REQUIRES_COLUMN = True 1523 1524 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1525 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1526 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1527 # as BigQuery, where all joins have the same precedence. 1528 JOINS_HAVE_EQUAL_PRECEDENCE = False 1529 1530 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1531 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1532 1533 # Whether map literals support arbitrary expressions as keys. 1534 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1535 # When False, keys are typically restricted to identifiers. 1536 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1537 1538 __slots__ = ( 1539 "error_level", 1540 "error_message_context", 1541 "max_errors", 1542 "dialect", 1543 "sql", 1544 "errors", 1545 "_tokens", 1546 "_index", 1547 "_curr", 1548 "_next", 1549 "_prev", 1550 "_prev_comments", 1551 "_pipe_cte_counter", 1552 ) 1553 1554 # Autofilled 1555 SHOW_TRIE: t.Dict = {} 1556 SET_TRIE: t.Dict = {} 1557 1558 def __init__( 1559 self, 1560 error_level: t.Optional[ErrorLevel] = None, 1561 error_message_context: int = 100, 1562 max_errors: int = 3, 1563 dialect: DialectType = None, 1564 ): 1565 from sqlglot.dialects import Dialect 1566 1567 self.error_level = error_level or ErrorLevel.IMMEDIATE 1568 self.error_message_context = error_message_context 1569 self.max_errors = max_errors 1570 self.dialect = Dialect.get_or_raise(dialect) 1571 self.reset() 1572 1573 def reset(self): 1574 self.sql = "" 1575 self.errors = [] 1576 self._tokens = [] 1577 self._index = 0 1578 self._curr = None 1579 self._next = None 1580 self._prev = None 1581 self._prev_comments = None 1582 self._pipe_cte_counter = 0 1583 1584 def parse( 1585 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1586 ) -> t.List[t.Optional[exp.Expression]]: 1587 """ 1588 Parses a list of tokens and returns a list of syntax trees, one tree 1589 per parsed SQL statement. 1590 1591 Args: 1592 raw_tokens: The list of tokens. 1593 sql: The original SQL string, used to produce helpful debug messages. 1594 1595 Returns: 1596 The list of the produced syntax trees. 1597 """ 1598 return self._parse( 1599 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1600 ) 1601 1602 def parse_into( 1603 self, 1604 expression_types: exp.IntoType, 1605 raw_tokens: t.List[Token], 1606 sql: t.Optional[str] = None, 1607 ) -> t.List[t.Optional[exp.Expression]]: 1608 """ 1609 Parses a list of tokens into a given Expression type. If a collection of Expression 1610 types is given instead, this method will try to parse the token list into each one 1611 of them, stopping at the first for which the parsing succeeds. 1612 1613 Args: 1614 expression_types: The expression type(s) to try and parse the token list into. 1615 raw_tokens: The list of tokens. 1616 sql: The original SQL string, used to produce helpful debug messages. 1617 1618 Returns: 1619 The target Expression. 1620 """ 1621 errors = [] 1622 for expression_type in ensure_list(expression_types): 1623 parser = self.EXPRESSION_PARSERS.get(expression_type) 1624 if not parser: 1625 raise TypeError(f"No parser registered for {expression_type}") 1626 1627 try: 1628 return self._parse(parser, raw_tokens, sql) 1629 except ParseError as e: 1630 e.errors[0]["into_expression"] = expression_type 1631 errors.append(e) 1632 1633 raise ParseError( 1634 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1635 errors=merge_errors(errors), 1636 ) from errors[-1] 1637 1638 def _parse( 1639 self, 1640 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1641 raw_tokens: t.List[Token], 1642 sql: t.Optional[str] = None, 1643 ) -> t.List[t.Optional[exp.Expression]]: 1644 self.reset() 1645 self.sql = sql or "" 1646 1647 total = len(raw_tokens) 1648 chunks: t.List[t.List[Token]] = [[]] 1649 1650 for i, token in enumerate(raw_tokens): 1651 if token.token_type == TokenType.SEMICOLON: 1652 if token.comments: 1653 chunks.append([token]) 1654 1655 if i < total - 1: 1656 chunks.append([]) 1657 else: 1658 chunks[-1].append(token) 1659 1660 expressions = [] 1661 1662 for tokens in chunks: 1663 self._index = -1 1664 self._tokens = tokens 1665 self._advance() 1666 1667 expressions.append(parse_method(self)) 1668 1669 if self._index < len(self._tokens): 1670 self.raise_error("Invalid expression / Unexpected token") 1671 1672 self.check_errors() 1673 1674 return expressions 1675 1676 def check_errors(self) -> None: 1677 """Logs or raises any found errors, depending on the chosen error level setting.""" 1678 if self.error_level == ErrorLevel.WARN: 1679 for error in self.errors: 1680 logger.error(str(error)) 1681 elif self.error_level == ErrorLevel.RAISE and self.errors: 1682 raise ParseError( 1683 concat_messages(self.errors, self.max_errors), 1684 errors=merge_errors(self.errors), 1685 ) 1686 1687 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1688 """ 1689 Appends an error in the list of recorded errors or raises it, depending on the chosen 1690 error level setting. 1691 """ 1692 token = token or self._curr or self._prev or Token.string("") 1693 start = token.start 1694 end = token.end + 1 1695 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1696 highlight = self.sql[start:end] 1697 end_context = self.sql[end : end + self.error_message_context] 1698 1699 error = ParseError.new( 1700 f"{message}. Line {token.line}, Col: {token.col}.\n" 1701 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1702 description=message, 1703 line=token.line, 1704 col=token.col, 1705 start_context=start_context, 1706 highlight=highlight, 1707 end_context=end_context, 1708 ) 1709 1710 if self.error_level == ErrorLevel.IMMEDIATE: 1711 raise error 1712 1713 self.errors.append(error) 1714 1715 def expression( 1716 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1717 ) -> E: 1718 """ 1719 Creates a new, validated Expression. 1720 1721 Args: 1722 exp_class: The expression class to instantiate. 1723 comments: An optional list of comments to attach to the expression. 1724 kwargs: The arguments to set for the expression along with their respective values. 1725 1726 Returns: 1727 The target expression. 1728 """ 1729 instance = exp_class(**kwargs) 1730 instance.add_comments(comments) if comments else self._add_comments(instance) 1731 return self.validate_expression(instance) 1732 1733 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1734 if expression and self._prev_comments: 1735 expression.add_comments(self._prev_comments) 1736 self._prev_comments = None 1737 1738 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1739 """ 1740 Validates an Expression, making sure that all its mandatory arguments are set. 1741 1742 Args: 1743 expression: The expression to validate. 1744 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1745 1746 Returns: 1747 The validated expression. 1748 """ 1749 if self.error_level != ErrorLevel.IGNORE: 1750 for error_message in expression.error_messages(args): 1751 self.raise_error(error_message) 1752 1753 return expression 1754 1755 def _find_sql(self, start: Token, end: Token) -> str: 1756 return self.sql[start.start : end.end + 1] 1757 1758 def _is_connected(self) -> bool: 1759 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1760 1761 def _advance(self, times: int = 1) -> None: 1762 self._index += times 1763 self._curr = seq_get(self._tokens, self._index) 1764 self._next = seq_get(self._tokens, self._index + 1) 1765 1766 if self._index > 0: 1767 self._prev = self._tokens[self._index - 1] 1768 self._prev_comments = self._prev.comments 1769 else: 1770 self._prev = None 1771 self._prev_comments = None 1772 1773 def _retreat(self, index: int) -> None: 1774 if index != self._index: 1775 self._advance(index - self._index) 1776 1777 def _warn_unsupported(self) -> None: 1778 if len(self._tokens) <= 1: 1779 return 1780 1781 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1782 # interested in emitting a warning for the one being currently processed. 1783 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1784 1785 logger.warning( 1786 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1787 ) 1788 1789 def _parse_command(self) -> exp.Command: 1790 self._warn_unsupported() 1791 return self.expression( 1792 exp.Command, 1793 comments=self._prev_comments, 1794 this=self._prev.text.upper(), 1795 expression=self._parse_string(), 1796 ) 1797 1798 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1799 """ 1800 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1801 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1802 solve this by setting & resetting the parser state accordingly 1803 """ 1804 index = self._index 1805 error_level = self.error_level 1806 1807 self.error_level = ErrorLevel.IMMEDIATE 1808 try: 1809 this = parse_method() 1810 except ParseError: 1811 this = None 1812 finally: 1813 if not this or retreat: 1814 self._retreat(index) 1815 self.error_level = error_level 1816 1817 return this 1818 1819 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1820 start = self._prev 1821 exists = self._parse_exists() if allow_exists else None 1822 1823 self._match(TokenType.ON) 1824 1825 materialized = self._match_text_seq("MATERIALIZED") 1826 kind = self._match_set(self.CREATABLES) and self._prev 1827 if not kind: 1828 return self._parse_as_command(start) 1829 1830 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1831 this = self._parse_user_defined_function(kind=kind.token_type) 1832 elif kind.token_type == TokenType.TABLE: 1833 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1834 elif kind.token_type == TokenType.COLUMN: 1835 this = self._parse_column() 1836 else: 1837 this = self._parse_id_var() 1838 1839 self._match(TokenType.IS) 1840 1841 return self.expression( 1842 exp.Comment, 1843 this=this, 1844 kind=kind.text, 1845 expression=self._parse_string(), 1846 exists=exists, 1847 materialized=materialized, 1848 ) 1849 1850 def _parse_to_table( 1851 self, 1852 ) -> exp.ToTableProperty: 1853 table = self._parse_table_parts(schema=True) 1854 return self.expression(exp.ToTableProperty, this=table) 1855 1856 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1857 def _parse_ttl(self) -> exp.Expression: 1858 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1859 this = self._parse_bitwise() 1860 1861 if self._match_text_seq("DELETE"): 1862 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1863 if self._match_text_seq("RECOMPRESS"): 1864 return self.expression( 1865 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1866 ) 1867 if self._match_text_seq("TO", "DISK"): 1868 return self.expression( 1869 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1870 ) 1871 if self._match_text_seq("TO", "VOLUME"): 1872 return self.expression( 1873 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1874 ) 1875 1876 return this 1877 1878 expressions = self._parse_csv(_parse_ttl_action) 1879 where = self._parse_where() 1880 group = self._parse_group() 1881 1882 aggregates = None 1883 if group and self._match(TokenType.SET): 1884 aggregates = self._parse_csv(self._parse_set_item) 1885 1886 return self.expression( 1887 exp.MergeTreeTTL, 1888 expressions=expressions, 1889 where=where, 1890 group=group, 1891 aggregates=aggregates, 1892 ) 1893 1894 def _parse_statement(self) -> t.Optional[exp.Expression]: 1895 if self._curr is None: 1896 return None 1897 1898 if self._match_set(self.STATEMENT_PARSERS): 1899 comments = self._prev_comments 1900 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1901 stmt.add_comments(comments, prepend=True) 1902 return stmt 1903 1904 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1905 return self._parse_command() 1906 1907 expression = self._parse_expression() 1908 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1909 return self._parse_query_modifiers(expression) 1910 1911 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1912 start = self._prev 1913 temporary = self._match(TokenType.TEMPORARY) 1914 materialized = self._match_text_seq("MATERIALIZED") 1915 1916 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1917 if not kind: 1918 return self._parse_as_command(start) 1919 1920 concurrently = self._match_text_seq("CONCURRENTLY") 1921 if_exists = exists or self._parse_exists() 1922 1923 if kind == "COLUMN": 1924 this = self._parse_column() 1925 else: 1926 this = self._parse_table_parts( 1927 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1928 ) 1929 1930 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1931 1932 if self._match(TokenType.L_PAREN, advance=False): 1933 expressions = self._parse_wrapped_csv(self._parse_types) 1934 else: 1935 expressions = None 1936 1937 return self.expression( 1938 exp.Drop, 1939 exists=if_exists, 1940 this=this, 1941 expressions=expressions, 1942 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1943 temporary=temporary, 1944 materialized=materialized, 1945 cascade=self._match_text_seq("CASCADE"), 1946 constraints=self._match_text_seq("CONSTRAINTS"), 1947 purge=self._match_text_seq("PURGE"), 1948 cluster=cluster, 1949 concurrently=concurrently, 1950 ) 1951 1952 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1953 return ( 1954 self._match_text_seq("IF") 1955 and (not not_ or self._match(TokenType.NOT)) 1956 and self._match(TokenType.EXISTS) 1957 ) 1958 1959 def _parse_create(self) -> exp.Create | exp.Command: 1960 # Note: this can't be None because we've matched a statement parser 1961 start = self._prev 1962 1963 replace = ( 1964 start.token_type == TokenType.REPLACE 1965 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1966 or self._match_pair(TokenType.OR, TokenType.ALTER) 1967 ) 1968 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1969 1970 unique = self._match(TokenType.UNIQUE) 1971 1972 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1973 clustered = True 1974 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1975 "COLUMNSTORE" 1976 ): 1977 clustered = False 1978 else: 1979 clustered = None 1980 1981 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1982 self._advance() 1983 1984 properties = None 1985 create_token = self._match_set(self.CREATABLES) and self._prev 1986 1987 if not create_token: 1988 # exp.Properties.Location.POST_CREATE 1989 properties = self._parse_properties() 1990 create_token = self._match_set(self.CREATABLES) and self._prev 1991 1992 if not properties or not create_token: 1993 return self._parse_as_command(start) 1994 1995 concurrently = self._match_text_seq("CONCURRENTLY") 1996 exists = self._parse_exists(not_=True) 1997 this = None 1998 expression: t.Optional[exp.Expression] = None 1999 indexes = None 2000 no_schema_binding = None 2001 begin = None 2002 end = None 2003 clone = None 2004 2005 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2006 nonlocal properties 2007 if properties and temp_props: 2008 properties.expressions.extend(temp_props.expressions) 2009 elif temp_props: 2010 properties = temp_props 2011 2012 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2013 this = self._parse_user_defined_function(kind=create_token.token_type) 2014 2015 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2016 extend_props(self._parse_properties()) 2017 2018 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2019 extend_props(self._parse_properties()) 2020 2021 if not expression: 2022 if self._match(TokenType.COMMAND): 2023 expression = self._parse_as_command(self._prev) 2024 else: 2025 begin = self._match(TokenType.BEGIN) 2026 return_ = self._match_text_seq("RETURN") 2027 2028 if self._match(TokenType.STRING, advance=False): 2029 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2030 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2031 expression = self._parse_string() 2032 extend_props(self._parse_properties()) 2033 else: 2034 expression = self._parse_user_defined_function_expression() 2035 2036 end = self._match_text_seq("END") 2037 2038 if return_: 2039 expression = self.expression(exp.Return, this=expression) 2040 elif create_token.token_type == TokenType.INDEX: 2041 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2042 if not self._match(TokenType.ON): 2043 index = self._parse_id_var() 2044 anonymous = False 2045 else: 2046 index = None 2047 anonymous = True 2048 2049 this = self._parse_index(index=index, anonymous=anonymous) 2050 elif create_token.token_type in self.DB_CREATABLES: 2051 table_parts = self._parse_table_parts( 2052 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2053 ) 2054 2055 # exp.Properties.Location.POST_NAME 2056 self._match(TokenType.COMMA) 2057 extend_props(self._parse_properties(before=True)) 2058 2059 this = self._parse_schema(this=table_parts) 2060 2061 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2062 extend_props(self._parse_properties()) 2063 2064 has_alias = self._match(TokenType.ALIAS) 2065 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2066 # exp.Properties.Location.POST_ALIAS 2067 extend_props(self._parse_properties()) 2068 2069 if create_token.token_type == TokenType.SEQUENCE: 2070 expression = self._parse_types() 2071 extend_props(self._parse_properties()) 2072 else: 2073 expression = self._parse_ddl_select() 2074 2075 # Some dialects also support using a table as an alias instead of a SELECT. 2076 # Here we fallback to this as an alternative. 2077 if not expression and has_alias: 2078 expression = self._try_parse(self._parse_table_parts) 2079 2080 if create_token.token_type == TokenType.TABLE: 2081 # exp.Properties.Location.POST_EXPRESSION 2082 extend_props(self._parse_properties()) 2083 2084 indexes = [] 2085 while True: 2086 index = self._parse_index() 2087 2088 # exp.Properties.Location.POST_INDEX 2089 extend_props(self._parse_properties()) 2090 if not index: 2091 break 2092 else: 2093 self._match(TokenType.COMMA) 2094 indexes.append(index) 2095 elif create_token.token_type == TokenType.VIEW: 2096 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2097 no_schema_binding = True 2098 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2099 extend_props(self._parse_properties()) 2100 2101 shallow = self._match_text_seq("SHALLOW") 2102 2103 if self._match_texts(self.CLONE_KEYWORDS): 2104 copy = self._prev.text.lower() == "copy" 2105 clone = self.expression( 2106 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2107 ) 2108 2109 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2110 return self._parse_as_command(start) 2111 2112 create_kind_text = create_token.text.upper() 2113 return self.expression( 2114 exp.Create, 2115 this=this, 2116 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2117 replace=replace, 2118 refresh=refresh, 2119 unique=unique, 2120 expression=expression, 2121 exists=exists, 2122 properties=properties, 2123 indexes=indexes, 2124 no_schema_binding=no_schema_binding, 2125 begin=begin, 2126 end=end, 2127 clone=clone, 2128 concurrently=concurrently, 2129 clustered=clustered, 2130 ) 2131 2132 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2133 seq = exp.SequenceProperties() 2134 2135 options = [] 2136 index = self._index 2137 2138 while self._curr: 2139 self._match(TokenType.COMMA) 2140 if self._match_text_seq("INCREMENT"): 2141 self._match_text_seq("BY") 2142 self._match_text_seq("=") 2143 seq.set("increment", self._parse_term()) 2144 elif self._match_text_seq("MINVALUE"): 2145 seq.set("minvalue", self._parse_term()) 2146 elif self._match_text_seq("MAXVALUE"): 2147 seq.set("maxvalue", self._parse_term()) 2148 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2149 self._match_text_seq("=") 2150 seq.set("start", self._parse_term()) 2151 elif self._match_text_seq("CACHE"): 2152 # T-SQL allows empty CACHE which is initialized dynamically 2153 seq.set("cache", self._parse_number() or True) 2154 elif self._match_text_seq("OWNED", "BY"): 2155 # "OWNED BY NONE" is the default 2156 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2157 else: 2158 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2159 if opt: 2160 options.append(opt) 2161 else: 2162 break 2163 2164 seq.set("options", options if options else None) 2165 return None if self._index == index else seq 2166 2167 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2168 # only used for teradata currently 2169 self._match(TokenType.COMMA) 2170 2171 kwargs = { 2172 "no": self._match_text_seq("NO"), 2173 "dual": self._match_text_seq("DUAL"), 2174 "before": self._match_text_seq("BEFORE"), 2175 "default": self._match_text_seq("DEFAULT"), 2176 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2177 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2178 "after": self._match_text_seq("AFTER"), 2179 "minimum": self._match_texts(("MIN", "MINIMUM")), 2180 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2181 } 2182 2183 if self._match_texts(self.PROPERTY_PARSERS): 2184 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2185 try: 2186 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2187 except TypeError: 2188 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2189 2190 return None 2191 2192 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2193 return self._parse_wrapped_csv(self._parse_property) 2194 2195 def _parse_property(self) -> t.Optional[exp.Expression]: 2196 if self._match_texts(self.PROPERTY_PARSERS): 2197 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2198 2199 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2200 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2201 2202 if self._match_text_seq("COMPOUND", "SORTKEY"): 2203 return self._parse_sortkey(compound=True) 2204 2205 if self._match_text_seq("SQL", "SECURITY"): 2206 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2207 2208 index = self._index 2209 key = self._parse_column() 2210 2211 if not self._match(TokenType.EQ): 2212 self._retreat(index) 2213 return self._parse_sequence_properties() 2214 2215 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2216 if isinstance(key, exp.Column): 2217 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2218 2219 value = self._parse_bitwise() or self._parse_var(any_token=True) 2220 2221 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2222 if isinstance(value, exp.Column): 2223 value = exp.var(value.name) 2224 2225 return self.expression(exp.Property, this=key, value=value) 2226 2227 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2228 if self._match_text_seq("BY"): 2229 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2230 2231 self._match(TokenType.ALIAS) 2232 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2233 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2234 2235 return self.expression( 2236 exp.FileFormatProperty, 2237 this=( 2238 self.expression( 2239 exp.InputOutputFormat, 2240 input_format=input_format, 2241 output_format=output_format, 2242 ) 2243 if input_format or output_format 2244 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2245 ), 2246 hive_format=True, 2247 ) 2248 2249 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2250 field = self._parse_field() 2251 if isinstance(field, exp.Identifier) and not field.quoted: 2252 field = exp.var(field) 2253 2254 return field 2255 2256 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2257 self._match(TokenType.EQ) 2258 self._match(TokenType.ALIAS) 2259 2260 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2261 2262 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2263 properties = [] 2264 while True: 2265 if before: 2266 prop = self._parse_property_before() 2267 else: 2268 prop = self._parse_property() 2269 if not prop: 2270 break 2271 for p in ensure_list(prop): 2272 properties.append(p) 2273 2274 if properties: 2275 return self.expression(exp.Properties, expressions=properties) 2276 2277 return None 2278 2279 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2280 return self.expression( 2281 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2282 ) 2283 2284 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2285 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2286 security_specifier = self._prev.text.upper() 2287 return self.expression(exp.SecurityProperty, this=security_specifier) 2288 return None 2289 2290 def _parse_settings_property(self) -> exp.SettingsProperty: 2291 return self.expression( 2292 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2293 ) 2294 2295 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2296 if self._index >= 2: 2297 pre_volatile_token = self._tokens[self._index - 2] 2298 else: 2299 pre_volatile_token = None 2300 2301 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2302 return exp.VolatileProperty() 2303 2304 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2305 2306 def _parse_retention_period(self) -> exp.Var: 2307 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2308 number = self._parse_number() 2309 number_str = f"{number} " if number else "" 2310 unit = self._parse_var(any_token=True) 2311 return exp.var(f"{number_str}{unit}") 2312 2313 def _parse_system_versioning_property( 2314 self, with_: bool = False 2315 ) -> exp.WithSystemVersioningProperty: 2316 self._match(TokenType.EQ) 2317 prop = self.expression( 2318 exp.WithSystemVersioningProperty, 2319 **{ # type: ignore 2320 "on": True, 2321 "with": with_, 2322 }, 2323 ) 2324 2325 if self._match_text_seq("OFF"): 2326 prop.set("on", False) 2327 return prop 2328 2329 self._match(TokenType.ON) 2330 if self._match(TokenType.L_PAREN): 2331 while self._curr and not self._match(TokenType.R_PAREN): 2332 if self._match_text_seq("HISTORY_TABLE", "="): 2333 prop.set("this", self._parse_table_parts()) 2334 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2335 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2336 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2337 prop.set("retention_period", self._parse_retention_period()) 2338 2339 self._match(TokenType.COMMA) 2340 2341 return prop 2342 2343 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2344 self._match(TokenType.EQ) 2345 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2346 prop = self.expression(exp.DataDeletionProperty, on=on) 2347 2348 if self._match(TokenType.L_PAREN): 2349 while self._curr and not self._match(TokenType.R_PAREN): 2350 if self._match_text_seq("FILTER_COLUMN", "="): 2351 prop.set("filter_column", self._parse_column()) 2352 elif self._match_text_seq("RETENTION_PERIOD", "="): 2353 prop.set("retention_period", self._parse_retention_period()) 2354 2355 self._match(TokenType.COMMA) 2356 2357 return prop 2358 2359 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2360 kind = "HASH" 2361 expressions: t.Optional[t.List[exp.Expression]] = None 2362 if self._match_text_seq("BY", "HASH"): 2363 expressions = self._parse_wrapped_csv(self._parse_id_var) 2364 elif self._match_text_seq("BY", "RANDOM"): 2365 kind = "RANDOM" 2366 2367 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2368 buckets: t.Optional[exp.Expression] = None 2369 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2370 buckets = self._parse_number() 2371 2372 return self.expression( 2373 exp.DistributedByProperty, 2374 expressions=expressions, 2375 kind=kind, 2376 buckets=buckets, 2377 order=self._parse_order(), 2378 ) 2379 2380 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2381 self._match_text_seq("KEY") 2382 expressions = self._parse_wrapped_id_vars() 2383 return self.expression(expr_type, expressions=expressions) 2384 2385 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2386 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2387 prop = self._parse_system_versioning_property(with_=True) 2388 self._match_r_paren() 2389 return prop 2390 2391 if self._match(TokenType.L_PAREN, advance=False): 2392 return self._parse_wrapped_properties() 2393 2394 if self._match_text_seq("JOURNAL"): 2395 return self._parse_withjournaltable() 2396 2397 if self._match_texts(self.VIEW_ATTRIBUTES): 2398 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2399 2400 if self._match_text_seq("DATA"): 2401 return self._parse_withdata(no=False) 2402 elif self._match_text_seq("NO", "DATA"): 2403 return self._parse_withdata(no=True) 2404 2405 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2406 return self._parse_serde_properties(with_=True) 2407 2408 if self._match(TokenType.SCHEMA): 2409 return self.expression( 2410 exp.WithSchemaBindingProperty, 2411 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2412 ) 2413 2414 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2415 return self.expression( 2416 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2417 ) 2418 2419 if not self._next: 2420 return None 2421 2422 return self._parse_withisolatedloading() 2423 2424 def _parse_procedure_option(self) -> exp.Expression | None: 2425 if self._match_text_seq("EXECUTE", "AS"): 2426 return self.expression( 2427 exp.ExecuteAsProperty, 2428 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2429 or self._parse_string(), 2430 ) 2431 2432 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2433 2434 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2435 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2436 self._match(TokenType.EQ) 2437 2438 user = self._parse_id_var() 2439 self._match(TokenType.PARAMETER) 2440 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2441 2442 if not user or not host: 2443 return None 2444 2445 return exp.DefinerProperty(this=f"{user}@{host}") 2446 2447 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2448 self._match(TokenType.TABLE) 2449 self._match(TokenType.EQ) 2450 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2451 2452 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2453 return self.expression(exp.LogProperty, no=no) 2454 2455 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2456 return self.expression(exp.JournalProperty, **kwargs) 2457 2458 def _parse_checksum(self) -> exp.ChecksumProperty: 2459 self._match(TokenType.EQ) 2460 2461 on = None 2462 if self._match(TokenType.ON): 2463 on = True 2464 elif self._match_text_seq("OFF"): 2465 on = False 2466 2467 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2468 2469 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2470 return self.expression( 2471 exp.Cluster, 2472 expressions=( 2473 self._parse_wrapped_csv(self._parse_ordered) 2474 if wrapped 2475 else self._parse_csv(self._parse_ordered) 2476 ), 2477 ) 2478 2479 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2480 self._match_text_seq("BY") 2481 2482 self._match_l_paren() 2483 expressions = self._parse_csv(self._parse_column) 2484 self._match_r_paren() 2485 2486 if self._match_text_seq("SORTED", "BY"): 2487 self._match_l_paren() 2488 sorted_by = self._parse_csv(self._parse_ordered) 2489 self._match_r_paren() 2490 else: 2491 sorted_by = None 2492 2493 self._match(TokenType.INTO) 2494 buckets = self._parse_number() 2495 self._match_text_seq("BUCKETS") 2496 2497 return self.expression( 2498 exp.ClusteredByProperty, 2499 expressions=expressions, 2500 sorted_by=sorted_by, 2501 buckets=buckets, 2502 ) 2503 2504 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2505 if not self._match_text_seq("GRANTS"): 2506 self._retreat(self._index - 1) 2507 return None 2508 2509 return self.expression(exp.CopyGrantsProperty) 2510 2511 def _parse_freespace(self) -> exp.FreespaceProperty: 2512 self._match(TokenType.EQ) 2513 return self.expression( 2514 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2515 ) 2516 2517 def _parse_mergeblockratio( 2518 self, no: bool = False, default: bool = False 2519 ) -> exp.MergeBlockRatioProperty: 2520 if self._match(TokenType.EQ): 2521 return self.expression( 2522 exp.MergeBlockRatioProperty, 2523 this=self._parse_number(), 2524 percent=self._match(TokenType.PERCENT), 2525 ) 2526 2527 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2528 2529 def _parse_datablocksize( 2530 self, 2531 default: t.Optional[bool] = None, 2532 minimum: t.Optional[bool] = None, 2533 maximum: t.Optional[bool] = None, 2534 ) -> exp.DataBlocksizeProperty: 2535 self._match(TokenType.EQ) 2536 size = self._parse_number() 2537 2538 units = None 2539 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2540 units = self._prev.text 2541 2542 return self.expression( 2543 exp.DataBlocksizeProperty, 2544 size=size, 2545 units=units, 2546 default=default, 2547 minimum=minimum, 2548 maximum=maximum, 2549 ) 2550 2551 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2552 self._match(TokenType.EQ) 2553 always = self._match_text_seq("ALWAYS") 2554 manual = self._match_text_seq("MANUAL") 2555 never = self._match_text_seq("NEVER") 2556 default = self._match_text_seq("DEFAULT") 2557 2558 autotemp = None 2559 if self._match_text_seq("AUTOTEMP"): 2560 autotemp = self._parse_schema() 2561 2562 return self.expression( 2563 exp.BlockCompressionProperty, 2564 always=always, 2565 manual=manual, 2566 never=never, 2567 default=default, 2568 autotemp=autotemp, 2569 ) 2570 2571 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2572 index = self._index 2573 no = self._match_text_seq("NO") 2574 concurrent = self._match_text_seq("CONCURRENT") 2575 2576 if not self._match_text_seq("ISOLATED", "LOADING"): 2577 self._retreat(index) 2578 return None 2579 2580 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2581 return self.expression( 2582 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2583 ) 2584 2585 def _parse_locking(self) -> exp.LockingProperty: 2586 if self._match(TokenType.TABLE): 2587 kind = "TABLE" 2588 elif self._match(TokenType.VIEW): 2589 kind = "VIEW" 2590 elif self._match(TokenType.ROW): 2591 kind = "ROW" 2592 elif self._match_text_seq("DATABASE"): 2593 kind = "DATABASE" 2594 else: 2595 kind = None 2596 2597 if kind in ("DATABASE", "TABLE", "VIEW"): 2598 this = self._parse_table_parts() 2599 else: 2600 this = None 2601 2602 if self._match(TokenType.FOR): 2603 for_or_in = "FOR" 2604 elif self._match(TokenType.IN): 2605 for_or_in = "IN" 2606 else: 2607 for_or_in = None 2608 2609 if self._match_text_seq("ACCESS"): 2610 lock_type = "ACCESS" 2611 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2612 lock_type = "EXCLUSIVE" 2613 elif self._match_text_seq("SHARE"): 2614 lock_type = "SHARE" 2615 elif self._match_text_seq("READ"): 2616 lock_type = "READ" 2617 elif self._match_text_seq("WRITE"): 2618 lock_type = "WRITE" 2619 elif self._match_text_seq("CHECKSUM"): 2620 lock_type = "CHECKSUM" 2621 else: 2622 lock_type = None 2623 2624 override = self._match_text_seq("OVERRIDE") 2625 2626 return self.expression( 2627 exp.LockingProperty, 2628 this=this, 2629 kind=kind, 2630 for_or_in=for_or_in, 2631 lock_type=lock_type, 2632 override=override, 2633 ) 2634 2635 def _parse_partition_by(self) -> t.List[exp.Expression]: 2636 if self._match(TokenType.PARTITION_BY): 2637 return self._parse_csv(self._parse_assignment) 2638 return [] 2639 2640 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2641 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2642 if self._match_text_seq("MINVALUE"): 2643 return exp.var("MINVALUE") 2644 if self._match_text_seq("MAXVALUE"): 2645 return exp.var("MAXVALUE") 2646 return self._parse_bitwise() 2647 2648 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2649 expression = None 2650 from_expressions = None 2651 to_expressions = None 2652 2653 if self._match(TokenType.IN): 2654 this = self._parse_wrapped_csv(self._parse_bitwise) 2655 elif self._match(TokenType.FROM): 2656 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2657 self._match_text_seq("TO") 2658 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2659 elif self._match_text_seq("WITH", "(", "MODULUS"): 2660 this = self._parse_number() 2661 self._match_text_seq(",", "REMAINDER") 2662 expression = self._parse_number() 2663 self._match_r_paren() 2664 else: 2665 self.raise_error("Failed to parse partition bound spec.") 2666 2667 return self.expression( 2668 exp.PartitionBoundSpec, 2669 this=this, 2670 expression=expression, 2671 from_expressions=from_expressions, 2672 to_expressions=to_expressions, 2673 ) 2674 2675 # https://www.postgresql.org/docs/current/sql-createtable.html 2676 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2677 if not self._match_text_seq("OF"): 2678 self._retreat(self._index - 1) 2679 return None 2680 2681 this = self._parse_table(schema=True) 2682 2683 if self._match(TokenType.DEFAULT): 2684 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2685 elif self._match_text_seq("FOR", "VALUES"): 2686 expression = self._parse_partition_bound_spec() 2687 else: 2688 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2689 2690 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2691 2692 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2693 self._match(TokenType.EQ) 2694 return self.expression( 2695 exp.PartitionedByProperty, 2696 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2697 ) 2698 2699 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2700 if self._match_text_seq("AND", "STATISTICS"): 2701 statistics = True 2702 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2703 statistics = False 2704 else: 2705 statistics = None 2706 2707 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2708 2709 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2710 if self._match_text_seq("SQL"): 2711 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2712 return None 2713 2714 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2715 if self._match_text_seq("SQL", "DATA"): 2716 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2717 return None 2718 2719 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2720 if self._match_text_seq("PRIMARY", "INDEX"): 2721 return exp.NoPrimaryIndexProperty() 2722 if self._match_text_seq("SQL"): 2723 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2724 return None 2725 2726 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2727 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2728 return exp.OnCommitProperty() 2729 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2730 return exp.OnCommitProperty(delete=True) 2731 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2732 2733 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2734 if self._match_text_seq("SQL", "DATA"): 2735 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2736 return None 2737 2738 def _parse_distkey(self) -> exp.DistKeyProperty: 2739 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2740 2741 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2742 table = self._parse_table(schema=True) 2743 2744 options = [] 2745 while self._match_texts(("INCLUDING", "EXCLUDING")): 2746 this = self._prev.text.upper() 2747 2748 id_var = self._parse_id_var() 2749 if not id_var: 2750 return None 2751 2752 options.append( 2753 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2754 ) 2755 2756 return self.expression(exp.LikeProperty, this=table, expressions=options) 2757 2758 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2759 return self.expression( 2760 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2761 ) 2762 2763 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2764 self._match(TokenType.EQ) 2765 return self.expression( 2766 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2767 ) 2768 2769 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2770 self._match_text_seq("WITH", "CONNECTION") 2771 return self.expression( 2772 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2773 ) 2774 2775 def _parse_returns(self) -> exp.ReturnsProperty: 2776 value: t.Optional[exp.Expression] 2777 null = None 2778 is_table = self._match(TokenType.TABLE) 2779 2780 if is_table: 2781 if self._match(TokenType.LT): 2782 value = self.expression( 2783 exp.Schema, 2784 this="TABLE", 2785 expressions=self._parse_csv(self._parse_struct_types), 2786 ) 2787 if not self._match(TokenType.GT): 2788 self.raise_error("Expecting >") 2789 else: 2790 value = self._parse_schema(exp.var("TABLE")) 2791 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2792 null = True 2793 value = None 2794 else: 2795 value = self._parse_types() 2796 2797 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2798 2799 def _parse_describe(self) -> exp.Describe: 2800 kind = self._match_set(self.CREATABLES) and self._prev.text 2801 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2802 if self._match(TokenType.DOT): 2803 style = None 2804 self._retreat(self._index - 2) 2805 2806 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2807 2808 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2809 this = self._parse_statement() 2810 else: 2811 this = self._parse_table(schema=True) 2812 2813 properties = self._parse_properties() 2814 expressions = properties.expressions if properties else None 2815 partition = self._parse_partition() 2816 return self.expression( 2817 exp.Describe, 2818 this=this, 2819 style=style, 2820 kind=kind, 2821 expressions=expressions, 2822 partition=partition, 2823 format=format, 2824 ) 2825 2826 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2827 kind = self._prev.text.upper() 2828 expressions = [] 2829 2830 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2831 if self._match(TokenType.WHEN): 2832 expression = self._parse_disjunction() 2833 self._match(TokenType.THEN) 2834 else: 2835 expression = None 2836 2837 else_ = self._match(TokenType.ELSE) 2838 2839 if not self._match(TokenType.INTO): 2840 return None 2841 2842 return self.expression( 2843 exp.ConditionalInsert, 2844 this=self.expression( 2845 exp.Insert, 2846 this=self._parse_table(schema=True), 2847 expression=self._parse_derived_table_values(), 2848 ), 2849 expression=expression, 2850 else_=else_, 2851 ) 2852 2853 expression = parse_conditional_insert() 2854 while expression is not None: 2855 expressions.append(expression) 2856 expression = parse_conditional_insert() 2857 2858 return self.expression( 2859 exp.MultitableInserts, 2860 kind=kind, 2861 comments=comments, 2862 expressions=expressions, 2863 source=self._parse_table(), 2864 ) 2865 2866 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2867 comments = [] 2868 hint = self._parse_hint() 2869 overwrite = self._match(TokenType.OVERWRITE) 2870 ignore = self._match(TokenType.IGNORE) 2871 local = self._match_text_seq("LOCAL") 2872 alternative = None 2873 is_function = None 2874 2875 if self._match_text_seq("DIRECTORY"): 2876 this: t.Optional[exp.Expression] = self.expression( 2877 exp.Directory, 2878 this=self._parse_var_or_string(), 2879 local=local, 2880 row_format=self._parse_row_format(match_row=True), 2881 ) 2882 else: 2883 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2884 comments += ensure_list(self._prev_comments) 2885 return self._parse_multitable_inserts(comments) 2886 2887 if self._match(TokenType.OR): 2888 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2889 2890 self._match(TokenType.INTO) 2891 comments += ensure_list(self._prev_comments) 2892 self._match(TokenType.TABLE) 2893 is_function = self._match(TokenType.FUNCTION) 2894 2895 this = ( 2896 self._parse_table(schema=True, parse_partition=True) 2897 if not is_function 2898 else self._parse_function() 2899 ) 2900 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2901 this.set("alias", self._parse_table_alias()) 2902 2903 returning = self._parse_returning() 2904 2905 return self.expression( 2906 exp.Insert, 2907 comments=comments, 2908 hint=hint, 2909 is_function=is_function, 2910 this=this, 2911 stored=self._match_text_seq("STORED") and self._parse_stored(), 2912 by_name=self._match_text_seq("BY", "NAME"), 2913 exists=self._parse_exists(), 2914 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2915 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2916 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2917 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2918 conflict=self._parse_on_conflict(), 2919 returning=returning or self._parse_returning(), 2920 overwrite=overwrite, 2921 alternative=alternative, 2922 ignore=ignore, 2923 source=self._match(TokenType.TABLE) and self._parse_table(), 2924 ) 2925 2926 def _parse_kill(self) -> exp.Kill: 2927 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2928 2929 return self.expression( 2930 exp.Kill, 2931 this=self._parse_primary(), 2932 kind=kind, 2933 ) 2934 2935 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2936 conflict = self._match_text_seq("ON", "CONFLICT") 2937 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2938 2939 if not conflict and not duplicate: 2940 return None 2941 2942 conflict_keys = None 2943 constraint = None 2944 2945 if conflict: 2946 if self._match_text_seq("ON", "CONSTRAINT"): 2947 constraint = self._parse_id_var() 2948 elif self._match(TokenType.L_PAREN): 2949 conflict_keys = self._parse_csv(self._parse_id_var) 2950 self._match_r_paren() 2951 2952 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2953 if self._prev.token_type == TokenType.UPDATE: 2954 self._match(TokenType.SET) 2955 expressions = self._parse_csv(self._parse_equality) 2956 else: 2957 expressions = None 2958 2959 return self.expression( 2960 exp.OnConflict, 2961 duplicate=duplicate, 2962 expressions=expressions, 2963 action=action, 2964 conflict_keys=conflict_keys, 2965 constraint=constraint, 2966 where=self._parse_where(), 2967 ) 2968 2969 def _parse_returning(self) -> t.Optional[exp.Returning]: 2970 if not self._match(TokenType.RETURNING): 2971 return None 2972 return self.expression( 2973 exp.Returning, 2974 expressions=self._parse_csv(self._parse_expression), 2975 into=self._match(TokenType.INTO) and self._parse_table_part(), 2976 ) 2977 2978 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2979 if not self._match(TokenType.FORMAT): 2980 return None 2981 return self._parse_row_format() 2982 2983 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2984 index = self._index 2985 with_ = with_ or self._match_text_seq("WITH") 2986 2987 if not self._match(TokenType.SERDE_PROPERTIES): 2988 self._retreat(index) 2989 return None 2990 return self.expression( 2991 exp.SerdeProperties, 2992 **{ # type: ignore 2993 "expressions": self._parse_wrapped_properties(), 2994 "with": with_, 2995 }, 2996 ) 2997 2998 def _parse_row_format( 2999 self, match_row: bool = False 3000 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3001 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3002 return None 3003 3004 if self._match_text_seq("SERDE"): 3005 this = self._parse_string() 3006 3007 serde_properties = self._parse_serde_properties() 3008 3009 return self.expression( 3010 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3011 ) 3012 3013 self._match_text_seq("DELIMITED") 3014 3015 kwargs = {} 3016 3017 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3018 kwargs["fields"] = self._parse_string() 3019 if self._match_text_seq("ESCAPED", "BY"): 3020 kwargs["escaped"] = self._parse_string() 3021 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3022 kwargs["collection_items"] = self._parse_string() 3023 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3024 kwargs["map_keys"] = self._parse_string() 3025 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3026 kwargs["lines"] = self._parse_string() 3027 if self._match_text_seq("NULL", "DEFINED", "AS"): 3028 kwargs["null"] = self._parse_string() 3029 3030 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3031 3032 def _parse_load(self) -> exp.LoadData | exp.Command: 3033 if self._match_text_seq("DATA"): 3034 local = self._match_text_seq("LOCAL") 3035 self._match_text_seq("INPATH") 3036 inpath = self._parse_string() 3037 overwrite = self._match(TokenType.OVERWRITE) 3038 self._match_pair(TokenType.INTO, TokenType.TABLE) 3039 3040 return self.expression( 3041 exp.LoadData, 3042 this=self._parse_table(schema=True), 3043 local=local, 3044 overwrite=overwrite, 3045 inpath=inpath, 3046 partition=self._parse_partition(), 3047 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3048 serde=self._match_text_seq("SERDE") and self._parse_string(), 3049 ) 3050 return self._parse_as_command(self._prev) 3051 3052 def _parse_delete(self) -> exp.Delete: 3053 # This handles MySQL's "Multiple-Table Syntax" 3054 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3055 tables = None 3056 if not self._match(TokenType.FROM, advance=False): 3057 tables = self._parse_csv(self._parse_table) or None 3058 3059 returning = self._parse_returning() 3060 3061 return self.expression( 3062 exp.Delete, 3063 tables=tables, 3064 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3065 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3066 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3067 where=self._parse_where(), 3068 returning=returning or self._parse_returning(), 3069 limit=self._parse_limit(), 3070 ) 3071 3072 def _parse_update(self) -> exp.Update: 3073 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3074 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3075 returning = self._parse_returning() 3076 return self.expression( 3077 exp.Update, 3078 **{ # type: ignore 3079 "this": this, 3080 "expressions": expressions, 3081 "from": self._parse_from(joins=True), 3082 "where": self._parse_where(), 3083 "returning": returning or self._parse_returning(), 3084 "order": self._parse_order(), 3085 "limit": self._parse_limit(), 3086 }, 3087 ) 3088 3089 def _parse_use(self) -> exp.Use: 3090 return self.expression( 3091 exp.Use, 3092 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3093 this=self._parse_table(schema=False), 3094 ) 3095 3096 def _parse_uncache(self) -> exp.Uncache: 3097 if not self._match(TokenType.TABLE): 3098 self.raise_error("Expecting TABLE after UNCACHE") 3099 3100 return self.expression( 3101 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3102 ) 3103 3104 def _parse_cache(self) -> exp.Cache: 3105 lazy = self._match_text_seq("LAZY") 3106 self._match(TokenType.TABLE) 3107 table = self._parse_table(schema=True) 3108 3109 options = [] 3110 if self._match_text_seq("OPTIONS"): 3111 self._match_l_paren() 3112 k = self._parse_string() 3113 self._match(TokenType.EQ) 3114 v = self._parse_string() 3115 options = [k, v] 3116 self._match_r_paren() 3117 3118 self._match(TokenType.ALIAS) 3119 return self.expression( 3120 exp.Cache, 3121 this=table, 3122 lazy=lazy, 3123 options=options, 3124 expression=self._parse_select(nested=True), 3125 ) 3126 3127 def _parse_partition(self) -> t.Optional[exp.Partition]: 3128 if not self._match_texts(self.PARTITION_KEYWORDS): 3129 return None 3130 3131 return self.expression( 3132 exp.Partition, 3133 subpartition=self._prev.text.upper() == "SUBPARTITION", 3134 expressions=self._parse_wrapped_csv(self._parse_assignment), 3135 ) 3136 3137 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3138 def _parse_value_expression() -> t.Optional[exp.Expression]: 3139 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3140 return exp.var(self._prev.text.upper()) 3141 return self._parse_expression() 3142 3143 if self._match(TokenType.L_PAREN): 3144 expressions = self._parse_csv(_parse_value_expression) 3145 self._match_r_paren() 3146 return self.expression(exp.Tuple, expressions=expressions) 3147 3148 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3149 expression = self._parse_expression() 3150 if expression: 3151 return self.expression(exp.Tuple, expressions=[expression]) 3152 return None 3153 3154 def _parse_projections(self) -> t.List[exp.Expression]: 3155 return self._parse_expressions() 3156 3157 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3158 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3159 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3160 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3161 ) 3162 elif self._match(TokenType.FROM): 3163 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3164 # Support parentheses for duckdb FROM-first syntax 3165 select = self._parse_select() 3166 if select: 3167 select.set("from", from_) 3168 this = select 3169 else: 3170 this = exp.select("*").from_(t.cast(exp.From, from_)) 3171 else: 3172 this = ( 3173 self._parse_table(consume_pipe=True) 3174 if table 3175 else self._parse_select(nested=True, parse_set_operation=False) 3176 ) 3177 3178 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3179 # in case a modifier (e.g. join) is following 3180 if table and isinstance(this, exp.Values) and this.alias: 3181 alias = this.args["alias"].pop() 3182 this = exp.Table(this=this, alias=alias) 3183 3184 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3185 3186 return this 3187 3188 def _parse_select( 3189 self, 3190 nested: bool = False, 3191 table: bool = False, 3192 parse_subquery_alias: bool = True, 3193 parse_set_operation: bool = True, 3194 consume_pipe: bool = True, 3195 ) -> t.Optional[exp.Expression]: 3196 query = self._parse_select_query( 3197 nested=nested, 3198 table=table, 3199 parse_subquery_alias=parse_subquery_alias, 3200 parse_set_operation=parse_set_operation, 3201 ) 3202 3203 if ( 3204 consume_pipe 3205 and self._match(TokenType.PIPE_GT, advance=False) 3206 and isinstance(query, exp.Query) 3207 ): 3208 query = self._parse_pipe_syntax_query(query) 3209 query = query.subquery(copy=False) if query and table else query 3210 3211 return query 3212 3213 def _parse_select_query( 3214 self, 3215 nested: bool = False, 3216 table: bool = False, 3217 parse_subquery_alias: bool = True, 3218 parse_set_operation: bool = True, 3219 ) -> t.Optional[exp.Expression]: 3220 cte = self._parse_with() 3221 3222 if cte: 3223 this = self._parse_statement() 3224 3225 if not this: 3226 self.raise_error("Failed to parse any statement following CTE") 3227 return cte 3228 3229 if "with" in this.arg_types: 3230 this.set("with", cte) 3231 else: 3232 self.raise_error(f"{this.key} does not support CTE") 3233 this = cte 3234 3235 return this 3236 3237 # duckdb supports leading with FROM x 3238 from_ = ( 3239 self._parse_from(consume_pipe=True) 3240 if self._match(TokenType.FROM, advance=False) 3241 else None 3242 ) 3243 3244 if self._match(TokenType.SELECT): 3245 comments = self._prev_comments 3246 3247 hint = self._parse_hint() 3248 3249 if self._next and not self._next.token_type == TokenType.DOT: 3250 all_ = self._match(TokenType.ALL) 3251 distinct = self._match_set(self.DISTINCT_TOKENS) 3252 else: 3253 all_, distinct = None, None 3254 3255 kind = ( 3256 self._match(TokenType.ALIAS) 3257 and self._match_texts(("STRUCT", "VALUE")) 3258 and self._prev.text.upper() 3259 ) 3260 3261 if distinct: 3262 distinct = self.expression( 3263 exp.Distinct, 3264 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3265 ) 3266 3267 if all_ and distinct: 3268 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3269 3270 operation_modifiers = [] 3271 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3272 operation_modifiers.append(exp.var(self._prev.text.upper())) 3273 3274 limit = self._parse_limit(top=True) 3275 projections = self._parse_projections() 3276 3277 this = self.expression( 3278 exp.Select, 3279 kind=kind, 3280 hint=hint, 3281 distinct=distinct, 3282 expressions=projections, 3283 limit=limit, 3284 operation_modifiers=operation_modifiers or None, 3285 ) 3286 this.comments = comments 3287 3288 into = self._parse_into() 3289 if into: 3290 this.set("into", into) 3291 3292 if not from_: 3293 from_ = self._parse_from() 3294 3295 if from_: 3296 this.set("from", from_) 3297 3298 this = self._parse_query_modifiers(this) 3299 elif (table or nested) and self._match(TokenType.L_PAREN): 3300 this = self._parse_wrapped_select(table=table) 3301 3302 # We return early here so that the UNION isn't attached to the subquery by the 3303 # following call to _parse_set_operations, but instead becomes the parent node 3304 self._match_r_paren() 3305 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3306 elif self._match(TokenType.VALUES, advance=False): 3307 this = self._parse_derived_table_values() 3308 elif from_: 3309 this = exp.select("*").from_(from_.this, copy=False) 3310 elif self._match(TokenType.SUMMARIZE): 3311 table = self._match(TokenType.TABLE) 3312 this = self._parse_select() or self._parse_string() or self._parse_table() 3313 return self.expression(exp.Summarize, this=this, table=table) 3314 elif self._match(TokenType.DESCRIBE): 3315 this = self._parse_describe() 3316 elif self._match_text_seq("STREAM"): 3317 this = self._parse_function() 3318 if this: 3319 this = self.expression(exp.Stream, this=this) 3320 else: 3321 self._retreat(self._index - 1) 3322 else: 3323 this = None 3324 3325 return self._parse_set_operations(this) if parse_set_operation else this 3326 3327 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3328 self._match_text_seq("SEARCH") 3329 3330 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3331 3332 if not kind: 3333 return None 3334 3335 self._match_text_seq("FIRST", "BY") 3336 3337 return self.expression( 3338 exp.RecursiveWithSearch, 3339 kind=kind, 3340 this=self._parse_id_var(), 3341 expression=self._match_text_seq("SET") and self._parse_id_var(), 3342 using=self._match_text_seq("USING") and self._parse_id_var(), 3343 ) 3344 3345 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3346 if not skip_with_token and not self._match(TokenType.WITH): 3347 return None 3348 3349 comments = self._prev_comments 3350 recursive = self._match(TokenType.RECURSIVE) 3351 3352 last_comments = None 3353 expressions = [] 3354 while True: 3355 cte = self._parse_cte() 3356 if isinstance(cte, exp.CTE): 3357 expressions.append(cte) 3358 if last_comments: 3359 cte.add_comments(last_comments) 3360 3361 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3362 break 3363 else: 3364 self._match(TokenType.WITH) 3365 3366 last_comments = self._prev_comments 3367 3368 return self.expression( 3369 exp.With, 3370 comments=comments, 3371 expressions=expressions, 3372 recursive=recursive, 3373 search=self._parse_recursive_with_search(), 3374 ) 3375 3376 def _parse_cte(self) -> t.Optional[exp.CTE]: 3377 index = self._index 3378 3379 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3380 if not alias or not alias.this: 3381 self.raise_error("Expected CTE to have alias") 3382 3383 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3384 self._retreat(index) 3385 return None 3386 3387 comments = self._prev_comments 3388 3389 if self._match_text_seq("NOT", "MATERIALIZED"): 3390 materialized = False 3391 elif self._match_text_seq("MATERIALIZED"): 3392 materialized = True 3393 else: 3394 materialized = None 3395 3396 cte = self.expression( 3397 exp.CTE, 3398 this=self._parse_wrapped(self._parse_statement), 3399 alias=alias, 3400 materialized=materialized, 3401 comments=comments, 3402 ) 3403 3404 values = cte.this 3405 if isinstance(values, exp.Values): 3406 if values.alias: 3407 cte.set("this", exp.select("*").from_(values)) 3408 else: 3409 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3410 3411 return cte 3412 3413 def _parse_table_alias( 3414 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3415 ) -> t.Optional[exp.TableAlias]: 3416 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3417 # so this section tries to parse the clause version and if it fails, it treats the token 3418 # as an identifier (alias) 3419 if self._can_parse_limit_or_offset(): 3420 return None 3421 3422 any_token = self._match(TokenType.ALIAS) 3423 alias = ( 3424 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3425 or self._parse_string_as_identifier() 3426 ) 3427 3428 index = self._index 3429 if self._match(TokenType.L_PAREN): 3430 columns = self._parse_csv(self._parse_function_parameter) 3431 self._match_r_paren() if columns else self._retreat(index) 3432 else: 3433 columns = None 3434 3435 if not alias and not columns: 3436 return None 3437 3438 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3439 3440 # We bubble up comments from the Identifier to the TableAlias 3441 if isinstance(alias, exp.Identifier): 3442 table_alias.add_comments(alias.pop_comments()) 3443 3444 return table_alias 3445 3446 def _parse_subquery( 3447 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3448 ) -> t.Optional[exp.Subquery]: 3449 if not this: 3450 return None 3451 3452 return self.expression( 3453 exp.Subquery, 3454 this=this, 3455 pivots=self._parse_pivots(), 3456 alias=self._parse_table_alias() if parse_alias else None, 3457 sample=self._parse_table_sample(), 3458 ) 3459 3460 def _implicit_unnests_to_explicit(self, this: E) -> E: 3461 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3462 3463 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3464 for i, join in enumerate(this.args.get("joins") or []): 3465 table = join.this 3466 normalized_table = table.copy() 3467 normalized_table.meta["maybe_column"] = True 3468 normalized_table = _norm(normalized_table, dialect=self.dialect) 3469 3470 if isinstance(table, exp.Table) and not join.args.get("on"): 3471 if normalized_table.parts[0].name in refs: 3472 table_as_column = table.to_column() 3473 unnest = exp.Unnest(expressions=[table_as_column]) 3474 3475 # Table.to_column creates a parent Alias node that we want to convert to 3476 # a TableAlias and attach to the Unnest, so it matches the parser's output 3477 if isinstance(table.args.get("alias"), exp.TableAlias): 3478 table_as_column.replace(table_as_column.this) 3479 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3480 3481 table.replace(unnest) 3482 3483 refs.add(normalized_table.alias_or_name) 3484 3485 return this 3486 3487 def _parse_query_modifiers( 3488 self, this: t.Optional[exp.Expression] 3489 ) -> t.Optional[exp.Expression]: 3490 if isinstance(this, self.MODIFIABLES): 3491 for join in self._parse_joins(): 3492 this.append("joins", join) 3493 for lateral in iter(self._parse_lateral, None): 3494 this.append("laterals", lateral) 3495 3496 while True: 3497 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3498 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3499 key, expression = parser(self) 3500 3501 if expression: 3502 this.set(key, expression) 3503 if key == "limit": 3504 offset = expression.args.pop("offset", None) 3505 3506 if offset: 3507 offset = exp.Offset(expression=offset) 3508 this.set("offset", offset) 3509 3510 limit_by_expressions = expression.expressions 3511 expression.set("expressions", None) 3512 offset.set("expressions", limit_by_expressions) 3513 continue 3514 break 3515 3516 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3517 this = self._implicit_unnests_to_explicit(this) 3518 3519 return this 3520 3521 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3522 start = self._curr 3523 while self._curr: 3524 self._advance() 3525 3526 end = self._tokens[self._index - 1] 3527 return exp.Hint(expressions=[self._find_sql(start, end)]) 3528 3529 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3530 return self._parse_function_call() 3531 3532 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3533 start_index = self._index 3534 should_fallback_to_string = False 3535 3536 hints = [] 3537 try: 3538 for hint in iter( 3539 lambda: self._parse_csv( 3540 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3541 ), 3542 [], 3543 ): 3544 hints.extend(hint) 3545 except ParseError: 3546 should_fallback_to_string = True 3547 3548 if should_fallback_to_string or self._curr: 3549 self._retreat(start_index) 3550 return self._parse_hint_fallback_to_string() 3551 3552 return self.expression(exp.Hint, expressions=hints) 3553 3554 def _parse_hint(self) -> t.Optional[exp.Hint]: 3555 if self._match(TokenType.HINT) and self._prev_comments: 3556 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3557 3558 return None 3559 3560 def _parse_into(self) -> t.Optional[exp.Into]: 3561 if not self._match(TokenType.INTO): 3562 return None 3563 3564 temp = self._match(TokenType.TEMPORARY) 3565 unlogged = self._match_text_seq("UNLOGGED") 3566 self._match(TokenType.TABLE) 3567 3568 return self.expression( 3569 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3570 ) 3571 3572 def _parse_from( 3573 self, 3574 joins: bool = False, 3575 skip_from_token: bool = False, 3576 consume_pipe: bool = False, 3577 ) -> t.Optional[exp.From]: 3578 if not skip_from_token and not self._match(TokenType.FROM): 3579 return None 3580 3581 return self.expression( 3582 exp.From, 3583 comments=self._prev_comments, 3584 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3585 ) 3586 3587 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3588 return self.expression( 3589 exp.MatchRecognizeMeasure, 3590 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3591 this=self._parse_expression(), 3592 ) 3593 3594 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3595 if not self._match(TokenType.MATCH_RECOGNIZE): 3596 return None 3597 3598 self._match_l_paren() 3599 3600 partition = self._parse_partition_by() 3601 order = self._parse_order() 3602 3603 measures = ( 3604 self._parse_csv(self._parse_match_recognize_measure) 3605 if self._match_text_seq("MEASURES") 3606 else None 3607 ) 3608 3609 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3610 rows = exp.var("ONE ROW PER MATCH") 3611 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3612 text = "ALL ROWS PER MATCH" 3613 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3614 text += " SHOW EMPTY MATCHES" 3615 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3616 text += " OMIT EMPTY MATCHES" 3617 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3618 text += " WITH UNMATCHED ROWS" 3619 rows = exp.var(text) 3620 else: 3621 rows = None 3622 3623 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3624 text = "AFTER MATCH SKIP" 3625 if self._match_text_seq("PAST", "LAST", "ROW"): 3626 text += " PAST LAST ROW" 3627 elif self._match_text_seq("TO", "NEXT", "ROW"): 3628 text += " TO NEXT ROW" 3629 elif self._match_text_seq("TO", "FIRST"): 3630 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3631 elif self._match_text_seq("TO", "LAST"): 3632 text += f" TO LAST {self._advance_any().text}" # type: ignore 3633 after = exp.var(text) 3634 else: 3635 after = None 3636 3637 if self._match_text_seq("PATTERN"): 3638 self._match_l_paren() 3639 3640 if not self._curr: 3641 self.raise_error("Expecting )", self._curr) 3642 3643 paren = 1 3644 start = self._curr 3645 3646 while self._curr and paren > 0: 3647 if self._curr.token_type == TokenType.L_PAREN: 3648 paren += 1 3649 if self._curr.token_type == TokenType.R_PAREN: 3650 paren -= 1 3651 3652 end = self._prev 3653 self._advance() 3654 3655 if paren > 0: 3656 self.raise_error("Expecting )", self._curr) 3657 3658 pattern = exp.var(self._find_sql(start, end)) 3659 else: 3660 pattern = None 3661 3662 define = ( 3663 self._parse_csv(self._parse_name_as_expression) 3664 if self._match_text_seq("DEFINE") 3665 else None 3666 ) 3667 3668 self._match_r_paren() 3669 3670 return self.expression( 3671 exp.MatchRecognize, 3672 partition_by=partition, 3673 order=order, 3674 measures=measures, 3675 rows=rows, 3676 after=after, 3677 pattern=pattern, 3678 define=define, 3679 alias=self._parse_table_alias(), 3680 ) 3681 3682 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3683 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3684 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3685 cross_apply = False 3686 3687 if cross_apply is not None: 3688 this = self._parse_select(table=True) 3689 view = None 3690 outer = None 3691 elif self._match(TokenType.LATERAL): 3692 this = self._parse_select(table=True) 3693 view = self._match(TokenType.VIEW) 3694 outer = self._match(TokenType.OUTER) 3695 else: 3696 return None 3697 3698 if not this: 3699 this = ( 3700 self._parse_unnest() 3701 or self._parse_function() 3702 or self._parse_id_var(any_token=False) 3703 ) 3704 3705 while self._match(TokenType.DOT): 3706 this = exp.Dot( 3707 this=this, 3708 expression=self._parse_function() or self._parse_id_var(any_token=False), 3709 ) 3710 3711 ordinality: t.Optional[bool] = None 3712 3713 if view: 3714 table = self._parse_id_var(any_token=False) 3715 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3716 table_alias: t.Optional[exp.TableAlias] = self.expression( 3717 exp.TableAlias, this=table, columns=columns 3718 ) 3719 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3720 # We move the alias from the lateral's child node to the lateral itself 3721 table_alias = this.args["alias"].pop() 3722 else: 3723 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3724 table_alias = self._parse_table_alias() 3725 3726 return self.expression( 3727 exp.Lateral, 3728 this=this, 3729 view=view, 3730 outer=outer, 3731 alias=table_alias, 3732 cross_apply=cross_apply, 3733 ordinality=ordinality, 3734 ) 3735 3736 def _parse_join_parts( 3737 self, 3738 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3739 return ( 3740 self._match_set(self.JOIN_METHODS) and self._prev, 3741 self._match_set(self.JOIN_SIDES) and self._prev, 3742 self._match_set(self.JOIN_KINDS) and self._prev, 3743 ) 3744 3745 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3746 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3747 this = self._parse_column() 3748 if isinstance(this, exp.Column): 3749 return this.this 3750 return this 3751 3752 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3753 3754 def _parse_join( 3755 self, skip_join_token: bool = False, parse_bracket: bool = False 3756 ) -> t.Optional[exp.Join]: 3757 if self._match(TokenType.COMMA): 3758 table = self._try_parse(self._parse_table) 3759 cross_join = self.expression(exp.Join, this=table) if table else None 3760 3761 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3762 cross_join.set("kind", "CROSS") 3763 3764 return cross_join 3765 3766 index = self._index 3767 method, side, kind = self._parse_join_parts() 3768 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3769 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3770 join_comments = self._prev_comments 3771 3772 if not skip_join_token and not join: 3773 self._retreat(index) 3774 kind = None 3775 method = None 3776 side = None 3777 3778 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3779 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3780 3781 if not skip_join_token and not join and not outer_apply and not cross_apply: 3782 return None 3783 3784 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3785 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3786 kwargs["expressions"] = self._parse_csv( 3787 lambda: self._parse_table(parse_bracket=parse_bracket) 3788 ) 3789 3790 if method: 3791 kwargs["method"] = method.text 3792 if side: 3793 kwargs["side"] = side.text 3794 if kind: 3795 kwargs["kind"] = kind.text 3796 if hint: 3797 kwargs["hint"] = hint 3798 3799 if self._match(TokenType.MATCH_CONDITION): 3800 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3801 3802 if self._match(TokenType.ON): 3803 kwargs["on"] = self._parse_assignment() 3804 elif self._match(TokenType.USING): 3805 kwargs["using"] = self._parse_using_identifiers() 3806 elif ( 3807 not (outer_apply or cross_apply) 3808 and not isinstance(kwargs["this"], exp.Unnest) 3809 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3810 ): 3811 index = self._index 3812 joins: t.Optional[list] = list(self._parse_joins()) 3813 3814 if joins and self._match(TokenType.ON): 3815 kwargs["on"] = self._parse_assignment() 3816 elif joins and self._match(TokenType.USING): 3817 kwargs["using"] = self._parse_using_identifiers() 3818 else: 3819 joins = None 3820 self._retreat(index) 3821 3822 kwargs["this"].set("joins", joins if joins else None) 3823 3824 kwargs["pivots"] = self._parse_pivots() 3825 3826 comments = [c for token in (method, side, kind) if token for c in token.comments] 3827 comments = (join_comments or []) + comments 3828 return self.expression(exp.Join, comments=comments, **kwargs) 3829 3830 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3831 this = self._parse_assignment() 3832 3833 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3834 return this 3835 3836 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3837 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3838 3839 return this 3840 3841 def _parse_index_params(self) -> exp.IndexParameters: 3842 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3843 3844 if self._match(TokenType.L_PAREN, advance=False): 3845 columns = self._parse_wrapped_csv(self._parse_with_operator) 3846 else: 3847 columns = None 3848 3849 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3850 partition_by = self._parse_partition_by() 3851 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3852 tablespace = ( 3853 self._parse_var(any_token=True) 3854 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3855 else None 3856 ) 3857 where = self._parse_where() 3858 3859 on = self._parse_field() if self._match(TokenType.ON) else None 3860 3861 return self.expression( 3862 exp.IndexParameters, 3863 using=using, 3864 columns=columns, 3865 include=include, 3866 partition_by=partition_by, 3867 where=where, 3868 with_storage=with_storage, 3869 tablespace=tablespace, 3870 on=on, 3871 ) 3872 3873 def _parse_index( 3874 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3875 ) -> t.Optional[exp.Index]: 3876 if index or anonymous: 3877 unique = None 3878 primary = None 3879 amp = None 3880 3881 self._match(TokenType.ON) 3882 self._match(TokenType.TABLE) # hive 3883 table = self._parse_table_parts(schema=True) 3884 else: 3885 unique = self._match(TokenType.UNIQUE) 3886 primary = self._match_text_seq("PRIMARY") 3887 amp = self._match_text_seq("AMP") 3888 3889 if not self._match(TokenType.INDEX): 3890 return None 3891 3892 index = self._parse_id_var() 3893 table = None 3894 3895 params = self._parse_index_params() 3896 3897 return self.expression( 3898 exp.Index, 3899 this=index, 3900 table=table, 3901 unique=unique, 3902 primary=primary, 3903 amp=amp, 3904 params=params, 3905 ) 3906 3907 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3908 hints: t.List[exp.Expression] = [] 3909 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3910 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3911 hints.append( 3912 self.expression( 3913 exp.WithTableHint, 3914 expressions=self._parse_csv( 3915 lambda: self._parse_function() or self._parse_var(any_token=True) 3916 ), 3917 ) 3918 ) 3919 self._match_r_paren() 3920 else: 3921 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3922 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3923 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3924 3925 self._match_set((TokenType.INDEX, TokenType.KEY)) 3926 if self._match(TokenType.FOR): 3927 hint.set("target", self._advance_any() and self._prev.text.upper()) 3928 3929 hint.set("expressions", self._parse_wrapped_id_vars()) 3930 hints.append(hint) 3931 3932 return hints or None 3933 3934 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3935 return ( 3936 (not schema and self._parse_function(optional_parens=False)) 3937 or self._parse_id_var(any_token=False) 3938 or self._parse_string_as_identifier() 3939 or self._parse_placeholder() 3940 ) 3941 3942 def _parse_table_parts( 3943 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3944 ) -> exp.Table: 3945 catalog = None 3946 db = None 3947 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3948 3949 while self._match(TokenType.DOT): 3950 if catalog: 3951 # This allows nesting the table in arbitrarily many dot expressions if needed 3952 table = self.expression( 3953 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3954 ) 3955 else: 3956 catalog = db 3957 db = table 3958 # "" used for tsql FROM a..b case 3959 table = self._parse_table_part(schema=schema) or "" 3960 3961 if ( 3962 wildcard 3963 and self._is_connected() 3964 and (isinstance(table, exp.Identifier) or not table) 3965 and self._match(TokenType.STAR) 3966 ): 3967 if isinstance(table, exp.Identifier): 3968 table.args["this"] += "*" 3969 else: 3970 table = exp.Identifier(this="*") 3971 3972 # We bubble up comments from the Identifier to the Table 3973 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3974 3975 if is_db_reference: 3976 catalog = db 3977 db = table 3978 table = None 3979 3980 if not table and not is_db_reference: 3981 self.raise_error(f"Expected table name but got {self._curr}") 3982 if not db and is_db_reference: 3983 self.raise_error(f"Expected database name but got {self._curr}") 3984 3985 table = self.expression( 3986 exp.Table, 3987 comments=comments, 3988 this=table, 3989 db=db, 3990 catalog=catalog, 3991 ) 3992 3993 changes = self._parse_changes() 3994 if changes: 3995 table.set("changes", changes) 3996 3997 at_before = self._parse_historical_data() 3998 if at_before: 3999 table.set("when", at_before) 4000 4001 pivots = self._parse_pivots() 4002 if pivots: 4003 table.set("pivots", pivots) 4004 4005 return table 4006 4007 def _parse_table( 4008 self, 4009 schema: bool = False, 4010 joins: bool = False, 4011 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4012 parse_bracket: bool = False, 4013 is_db_reference: bool = False, 4014 parse_partition: bool = False, 4015 consume_pipe: bool = False, 4016 ) -> t.Optional[exp.Expression]: 4017 lateral = self._parse_lateral() 4018 if lateral: 4019 return lateral 4020 4021 unnest = self._parse_unnest() 4022 if unnest: 4023 return unnest 4024 4025 values = self._parse_derived_table_values() 4026 if values: 4027 return values 4028 4029 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4030 if subquery: 4031 if not subquery.args.get("pivots"): 4032 subquery.set("pivots", self._parse_pivots()) 4033 return subquery 4034 4035 bracket = parse_bracket and self._parse_bracket(None) 4036 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4037 4038 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4039 self._parse_table 4040 ) 4041 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4042 4043 only = self._match(TokenType.ONLY) 4044 4045 this = t.cast( 4046 exp.Expression, 4047 bracket 4048 or rows_from 4049 or self._parse_bracket( 4050 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4051 ), 4052 ) 4053 4054 if only: 4055 this.set("only", only) 4056 4057 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4058 self._match_text_seq("*") 4059 4060 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4061 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4062 this.set("partition", self._parse_partition()) 4063 4064 if schema: 4065 return self._parse_schema(this=this) 4066 4067 version = self._parse_version() 4068 4069 if version: 4070 this.set("version", version) 4071 4072 if self.dialect.ALIAS_POST_TABLESAMPLE: 4073 this.set("sample", self._parse_table_sample()) 4074 4075 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4076 if alias: 4077 this.set("alias", alias) 4078 4079 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4080 return self.expression( 4081 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4082 ) 4083 4084 this.set("hints", self._parse_table_hints()) 4085 4086 if not this.args.get("pivots"): 4087 this.set("pivots", self._parse_pivots()) 4088 4089 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4090 this.set("sample", self._parse_table_sample()) 4091 4092 if joins: 4093 for join in self._parse_joins(): 4094 this.append("joins", join) 4095 4096 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4097 this.set("ordinality", True) 4098 this.set("alias", self._parse_table_alias()) 4099 4100 return this 4101 4102 def _parse_version(self) -> t.Optional[exp.Version]: 4103 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4104 this = "TIMESTAMP" 4105 elif self._match(TokenType.VERSION_SNAPSHOT): 4106 this = "VERSION" 4107 else: 4108 return None 4109 4110 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4111 kind = self._prev.text.upper() 4112 start = self._parse_bitwise() 4113 self._match_texts(("TO", "AND")) 4114 end = self._parse_bitwise() 4115 expression: t.Optional[exp.Expression] = self.expression( 4116 exp.Tuple, expressions=[start, end] 4117 ) 4118 elif self._match_text_seq("CONTAINED", "IN"): 4119 kind = "CONTAINED IN" 4120 expression = self.expression( 4121 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4122 ) 4123 elif self._match(TokenType.ALL): 4124 kind = "ALL" 4125 expression = None 4126 else: 4127 self._match_text_seq("AS", "OF") 4128 kind = "AS OF" 4129 expression = self._parse_type() 4130 4131 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4132 4133 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4134 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4135 index = self._index 4136 historical_data = None 4137 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4138 this = self._prev.text.upper() 4139 kind = ( 4140 self._match(TokenType.L_PAREN) 4141 and self._match_texts(self.HISTORICAL_DATA_KIND) 4142 and self._prev.text.upper() 4143 ) 4144 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4145 4146 if expression: 4147 self._match_r_paren() 4148 historical_data = self.expression( 4149 exp.HistoricalData, this=this, kind=kind, expression=expression 4150 ) 4151 else: 4152 self._retreat(index) 4153 4154 return historical_data 4155 4156 def _parse_changes(self) -> t.Optional[exp.Changes]: 4157 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4158 return None 4159 4160 information = self._parse_var(any_token=True) 4161 self._match_r_paren() 4162 4163 return self.expression( 4164 exp.Changes, 4165 information=information, 4166 at_before=self._parse_historical_data(), 4167 end=self._parse_historical_data(), 4168 ) 4169 4170 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4171 if not self._match(TokenType.UNNEST): 4172 return None 4173 4174 expressions = self._parse_wrapped_csv(self._parse_equality) 4175 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4176 4177 alias = self._parse_table_alias() if with_alias else None 4178 4179 if alias: 4180 if self.dialect.UNNEST_COLUMN_ONLY: 4181 if alias.args.get("columns"): 4182 self.raise_error("Unexpected extra column alias in unnest.") 4183 4184 alias.set("columns", [alias.this]) 4185 alias.set("this", None) 4186 4187 columns = alias.args.get("columns") or [] 4188 if offset and len(expressions) < len(columns): 4189 offset = columns.pop() 4190 4191 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4192 self._match(TokenType.ALIAS) 4193 offset = self._parse_id_var( 4194 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4195 ) or exp.to_identifier("offset") 4196 4197 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4198 4199 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4200 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4201 if not is_derived and not ( 4202 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4203 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4204 ): 4205 return None 4206 4207 expressions = self._parse_csv(self._parse_value) 4208 alias = self._parse_table_alias() 4209 4210 if is_derived: 4211 self._match_r_paren() 4212 4213 return self.expression( 4214 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4215 ) 4216 4217 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4218 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4219 as_modifier and self._match_text_seq("USING", "SAMPLE") 4220 ): 4221 return None 4222 4223 bucket_numerator = None 4224 bucket_denominator = None 4225 bucket_field = None 4226 percent = None 4227 size = None 4228 seed = None 4229 4230 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4231 matched_l_paren = self._match(TokenType.L_PAREN) 4232 4233 if self.TABLESAMPLE_CSV: 4234 num = None 4235 expressions = self._parse_csv(self._parse_primary) 4236 else: 4237 expressions = None 4238 num = ( 4239 self._parse_factor() 4240 if self._match(TokenType.NUMBER, advance=False) 4241 else self._parse_primary() or self._parse_placeholder() 4242 ) 4243 4244 if self._match_text_seq("BUCKET"): 4245 bucket_numerator = self._parse_number() 4246 self._match_text_seq("OUT", "OF") 4247 bucket_denominator = bucket_denominator = self._parse_number() 4248 self._match(TokenType.ON) 4249 bucket_field = self._parse_field() 4250 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4251 percent = num 4252 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4253 size = num 4254 else: 4255 percent = num 4256 4257 if matched_l_paren: 4258 self._match_r_paren() 4259 4260 if self._match(TokenType.L_PAREN): 4261 method = self._parse_var(upper=True) 4262 seed = self._match(TokenType.COMMA) and self._parse_number() 4263 self._match_r_paren() 4264 elif self._match_texts(("SEED", "REPEATABLE")): 4265 seed = self._parse_wrapped(self._parse_number) 4266 4267 if not method and self.DEFAULT_SAMPLING_METHOD: 4268 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4269 4270 return self.expression( 4271 exp.TableSample, 4272 expressions=expressions, 4273 method=method, 4274 bucket_numerator=bucket_numerator, 4275 bucket_denominator=bucket_denominator, 4276 bucket_field=bucket_field, 4277 percent=percent, 4278 size=size, 4279 seed=seed, 4280 ) 4281 4282 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4283 return list(iter(self._parse_pivot, None)) or None 4284 4285 def _parse_joins(self) -> t.Iterator[exp.Join]: 4286 return iter(self._parse_join, None) 4287 4288 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4289 if not self._match(TokenType.INTO): 4290 return None 4291 4292 return self.expression( 4293 exp.UnpivotColumns, 4294 this=self._match_text_seq("NAME") and self._parse_column(), 4295 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4296 ) 4297 4298 # https://duckdb.org/docs/sql/statements/pivot 4299 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4300 def _parse_on() -> t.Optional[exp.Expression]: 4301 this = self._parse_bitwise() 4302 4303 if self._match(TokenType.IN): 4304 # PIVOT ... ON col IN (row_val1, row_val2) 4305 return self._parse_in(this) 4306 if self._match(TokenType.ALIAS, advance=False): 4307 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4308 return self._parse_alias(this) 4309 4310 return this 4311 4312 this = self._parse_table() 4313 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4314 into = self._parse_unpivot_columns() 4315 using = self._match(TokenType.USING) and self._parse_csv( 4316 lambda: self._parse_alias(self._parse_function()) 4317 ) 4318 group = self._parse_group() 4319 4320 return self.expression( 4321 exp.Pivot, 4322 this=this, 4323 expressions=expressions, 4324 using=using, 4325 group=group, 4326 unpivot=is_unpivot, 4327 into=into, 4328 ) 4329 4330 def _parse_pivot_in(self) -> exp.In: 4331 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4332 this = self._parse_select_or_expression() 4333 4334 self._match(TokenType.ALIAS) 4335 alias = self._parse_bitwise() 4336 if alias: 4337 if isinstance(alias, exp.Column) and not alias.db: 4338 alias = alias.this 4339 return self.expression(exp.PivotAlias, this=this, alias=alias) 4340 4341 return this 4342 4343 value = self._parse_column() 4344 4345 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4346 self.raise_error("Expecting IN (") 4347 4348 if self._match(TokenType.ANY): 4349 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4350 else: 4351 exprs = self._parse_csv(_parse_aliased_expression) 4352 4353 self._match_r_paren() 4354 return self.expression(exp.In, this=value, expressions=exprs) 4355 4356 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4357 func = self._parse_function() 4358 if not func: 4359 self.raise_error("Expecting an aggregation function in PIVOT") 4360 4361 return self._parse_alias(func) 4362 4363 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4364 index = self._index 4365 include_nulls = None 4366 4367 if self._match(TokenType.PIVOT): 4368 unpivot = False 4369 elif self._match(TokenType.UNPIVOT): 4370 unpivot = True 4371 4372 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4373 if self._match_text_seq("INCLUDE", "NULLS"): 4374 include_nulls = True 4375 elif self._match_text_seq("EXCLUDE", "NULLS"): 4376 include_nulls = False 4377 else: 4378 return None 4379 4380 expressions = [] 4381 4382 if not self._match(TokenType.L_PAREN): 4383 self._retreat(index) 4384 return None 4385 4386 if unpivot: 4387 expressions = self._parse_csv(self._parse_column) 4388 else: 4389 expressions = self._parse_csv(self._parse_pivot_aggregation) 4390 4391 if not expressions: 4392 self.raise_error("Failed to parse PIVOT's aggregation list") 4393 4394 if not self._match(TokenType.FOR): 4395 self.raise_error("Expecting FOR") 4396 4397 fields = [] 4398 while True: 4399 field = self._try_parse(self._parse_pivot_in) 4400 if not field: 4401 break 4402 fields.append(field) 4403 4404 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4405 self._parse_bitwise 4406 ) 4407 4408 group = self._parse_group() 4409 4410 self._match_r_paren() 4411 4412 pivot = self.expression( 4413 exp.Pivot, 4414 expressions=expressions, 4415 fields=fields, 4416 unpivot=unpivot, 4417 include_nulls=include_nulls, 4418 default_on_null=default_on_null, 4419 group=group, 4420 ) 4421 4422 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4423 pivot.set("alias", self._parse_table_alias()) 4424 4425 if not unpivot: 4426 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4427 4428 columns: t.List[exp.Expression] = [] 4429 all_fields = [] 4430 for pivot_field in pivot.fields: 4431 pivot_field_expressions = pivot_field.expressions 4432 4433 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4434 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4435 continue 4436 4437 all_fields.append( 4438 [ 4439 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4440 for fld in pivot_field_expressions 4441 ] 4442 ) 4443 4444 if all_fields: 4445 if names: 4446 all_fields.append(names) 4447 4448 # Generate all possible combinations of the pivot columns 4449 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4450 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4451 for fld_parts_tuple in itertools.product(*all_fields): 4452 fld_parts = list(fld_parts_tuple) 4453 4454 if names and self.PREFIXED_PIVOT_COLUMNS: 4455 # Move the "name" to the front of the list 4456 fld_parts.insert(0, fld_parts.pop(-1)) 4457 4458 columns.append(exp.to_identifier("_".join(fld_parts))) 4459 4460 pivot.set("columns", columns) 4461 4462 return pivot 4463 4464 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4465 return [agg.alias for agg in aggregations if agg.alias] 4466 4467 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4468 if not skip_where_token and not self._match(TokenType.PREWHERE): 4469 return None 4470 4471 return self.expression( 4472 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4473 ) 4474 4475 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4476 if not skip_where_token and not self._match(TokenType.WHERE): 4477 return None 4478 4479 return self.expression( 4480 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4481 ) 4482 4483 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4484 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4485 return None 4486 comments = self._prev_comments 4487 4488 elements: t.Dict[str, t.Any] = defaultdict(list) 4489 4490 if self._match(TokenType.ALL): 4491 elements["all"] = True 4492 elif self._match(TokenType.DISTINCT): 4493 elements["all"] = False 4494 4495 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4496 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4497 4498 while True: 4499 index = self._index 4500 4501 elements["expressions"].extend( 4502 self._parse_csv( 4503 lambda: None 4504 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4505 else self._parse_assignment() 4506 ) 4507 ) 4508 4509 before_with_index = self._index 4510 with_prefix = self._match(TokenType.WITH) 4511 4512 if self._match(TokenType.ROLLUP): 4513 elements["rollup"].append( 4514 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4515 ) 4516 elif self._match(TokenType.CUBE): 4517 elements["cube"].append( 4518 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4519 ) 4520 elif self._match(TokenType.GROUPING_SETS): 4521 elements["grouping_sets"].append( 4522 self.expression( 4523 exp.GroupingSets, 4524 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4525 ) 4526 ) 4527 elif self._match_text_seq("TOTALS"): 4528 elements["totals"] = True # type: ignore 4529 4530 if before_with_index <= self._index <= before_with_index + 1: 4531 self._retreat(before_with_index) 4532 break 4533 4534 if index == self._index: 4535 break 4536 4537 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4538 4539 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4540 return self.expression( 4541 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4542 ) 4543 4544 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4545 if self._match(TokenType.L_PAREN): 4546 grouping_set = self._parse_csv(self._parse_column) 4547 self._match_r_paren() 4548 return self.expression(exp.Tuple, expressions=grouping_set) 4549 4550 return self._parse_column() 4551 4552 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4553 if not skip_having_token and not self._match(TokenType.HAVING): 4554 return None 4555 return self.expression( 4556 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4557 ) 4558 4559 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4560 if not self._match(TokenType.QUALIFY): 4561 return None 4562 return self.expression(exp.Qualify, this=self._parse_assignment()) 4563 4564 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4565 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4566 exp.Prior, this=self._parse_bitwise() 4567 ) 4568 connect = self._parse_assignment() 4569 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4570 return connect 4571 4572 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4573 if skip_start_token: 4574 start = None 4575 elif self._match(TokenType.START_WITH): 4576 start = self._parse_assignment() 4577 else: 4578 return None 4579 4580 self._match(TokenType.CONNECT_BY) 4581 nocycle = self._match_text_seq("NOCYCLE") 4582 connect = self._parse_connect_with_prior() 4583 4584 if not start and self._match(TokenType.START_WITH): 4585 start = self._parse_assignment() 4586 4587 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4588 4589 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4590 this = self._parse_id_var(any_token=True) 4591 if self._match(TokenType.ALIAS): 4592 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4593 return this 4594 4595 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4596 if self._match_text_seq("INTERPOLATE"): 4597 return self._parse_wrapped_csv(self._parse_name_as_expression) 4598 return None 4599 4600 def _parse_order( 4601 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4602 ) -> t.Optional[exp.Expression]: 4603 siblings = None 4604 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4605 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4606 return this 4607 4608 siblings = True 4609 4610 return self.expression( 4611 exp.Order, 4612 comments=self._prev_comments, 4613 this=this, 4614 expressions=self._parse_csv(self._parse_ordered), 4615 siblings=siblings, 4616 ) 4617 4618 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4619 if not self._match(token): 4620 return None 4621 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4622 4623 def _parse_ordered( 4624 self, parse_method: t.Optional[t.Callable] = None 4625 ) -> t.Optional[exp.Ordered]: 4626 this = parse_method() if parse_method else self._parse_assignment() 4627 if not this: 4628 return None 4629 4630 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4631 this = exp.var("ALL") 4632 4633 asc = self._match(TokenType.ASC) 4634 desc = self._match(TokenType.DESC) or (asc and False) 4635 4636 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4637 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4638 4639 nulls_first = is_nulls_first or False 4640 explicitly_null_ordered = is_nulls_first or is_nulls_last 4641 4642 if ( 4643 not explicitly_null_ordered 4644 and ( 4645 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4646 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4647 ) 4648 and self.dialect.NULL_ORDERING != "nulls_are_last" 4649 ): 4650 nulls_first = True 4651 4652 if self._match_text_seq("WITH", "FILL"): 4653 with_fill = self.expression( 4654 exp.WithFill, 4655 **{ # type: ignore 4656 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4657 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4658 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4659 "interpolate": self._parse_interpolate(), 4660 }, 4661 ) 4662 else: 4663 with_fill = None 4664 4665 return self.expression( 4666 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4667 ) 4668 4669 def _parse_limit_options(self) -> exp.LimitOptions: 4670 percent = self._match(TokenType.PERCENT) 4671 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4672 self._match_text_seq("ONLY") 4673 with_ties = self._match_text_seq("WITH", "TIES") 4674 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4675 4676 def _parse_limit( 4677 self, 4678 this: t.Optional[exp.Expression] = None, 4679 top: bool = False, 4680 skip_limit_token: bool = False, 4681 ) -> t.Optional[exp.Expression]: 4682 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4683 comments = self._prev_comments 4684 if top: 4685 limit_paren = self._match(TokenType.L_PAREN) 4686 expression = self._parse_term() if limit_paren else self._parse_number() 4687 4688 if limit_paren: 4689 self._match_r_paren() 4690 4691 limit_options = self._parse_limit_options() 4692 else: 4693 limit_options = None 4694 expression = self._parse_term() 4695 4696 if self._match(TokenType.COMMA): 4697 offset = expression 4698 expression = self._parse_term() 4699 else: 4700 offset = None 4701 4702 limit_exp = self.expression( 4703 exp.Limit, 4704 this=this, 4705 expression=expression, 4706 offset=offset, 4707 comments=comments, 4708 limit_options=limit_options, 4709 expressions=self._parse_limit_by(), 4710 ) 4711 4712 return limit_exp 4713 4714 if self._match(TokenType.FETCH): 4715 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4716 direction = self._prev.text.upper() if direction else "FIRST" 4717 4718 count = self._parse_field(tokens=self.FETCH_TOKENS) 4719 4720 return self.expression( 4721 exp.Fetch, 4722 direction=direction, 4723 count=count, 4724 limit_options=self._parse_limit_options(), 4725 ) 4726 4727 return this 4728 4729 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4730 if not self._match(TokenType.OFFSET): 4731 return this 4732 4733 count = self._parse_term() 4734 self._match_set((TokenType.ROW, TokenType.ROWS)) 4735 4736 return self.expression( 4737 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4738 ) 4739 4740 def _can_parse_limit_or_offset(self) -> bool: 4741 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4742 return False 4743 4744 index = self._index 4745 result = bool( 4746 self._try_parse(self._parse_limit, retreat=True) 4747 or self._try_parse(self._parse_offset, retreat=True) 4748 ) 4749 self._retreat(index) 4750 return result 4751 4752 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4753 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4754 4755 def _parse_locks(self) -> t.List[exp.Lock]: 4756 locks = [] 4757 while True: 4758 update, key = None, None 4759 if self._match_text_seq("FOR", "UPDATE"): 4760 update = True 4761 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4762 "LOCK", "IN", "SHARE", "MODE" 4763 ): 4764 update = False 4765 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4766 update, key = False, True 4767 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4768 update, key = True, True 4769 else: 4770 break 4771 4772 expressions = None 4773 if self._match_text_seq("OF"): 4774 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4775 4776 wait: t.Optional[bool | exp.Expression] = None 4777 if self._match_text_seq("NOWAIT"): 4778 wait = True 4779 elif self._match_text_seq("WAIT"): 4780 wait = self._parse_primary() 4781 elif self._match_text_seq("SKIP", "LOCKED"): 4782 wait = False 4783 4784 locks.append( 4785 self.expression( 4786 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4787 ) 4788 ) 4789 4790 return locks 4791 4792 def parse_set_operation( 4793 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4794 ) -> t.Optional[exp.Expression]: 4795 start = self._index 4796 _, side_token, kind_token = self._parse_join_parts() 4797 4798 side = side_token.text if side_token else None 4799 kind = kind_token.text if kind_token else None 4800 4801 if not self._match_set(self.SET_OPERATIONS): 4802 self._retreat(start) 4803 return None 4804 4805 token_type = self._prev.token_type 4806 4807 if token_type == TokenType.UNION: 4808 operation: t.Type[exp.SetOperation] = exp.Union 4809 elif token_type == TokenType.EXCEPT: 4810 operation = exp.Except 4811 else: 4812 operation = exp.Intersect 4813 4814 comments = self._prev.comments 4815 4816 if self._match(TokenType.DISTINCT): 4817 distinct: t.Optional[bool] = True 4818 elif self._match(TokenType.ALL): 4819 distinct = False 4820 else: 4821 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4822 if distinct is None: 4823 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4824 4825 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4826 "STRICT", "CORRESPONDING" 4827 ) 4828 if self._match_text_seq("CORRESPONDING"): 4829 by_name = True 4830 if not side and not kind: 4831 kind = "INNER" 4832 4833 on_column_list = None 4834 if by_name and self._match_texts(("ON", "BY")): 4835 on_column_list = self._parse_wrapped_csv(self._parse_column) 4836 4837 expression = self._parse_select( 4838 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4839 ) 4840 4841 return self.expression( 4842 operation, 4843 comments=comments, 4844 this=this, 4845 distinct=distinct, 4846 by_name=by_name, 4847 expression=expression, 4848 side=side, 4849 kind=kind, 4850 on=on_column_list, 4851 ) 4852 4853 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4854 while this: 4855 setop = self.parse_set_operation(this) 4856 if not setop: 4857 break 4858 this = setop 4859 4860 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4861 expression = this.expression 4862 4863 if expression: 4864 for arg in self.SET_OP_MODIFIERS: 4865 expr = expression.args.get(arg) 4866 if expr: 4867 this.set(arg, expr.pop()) 4868 4869 return this 4870 4871 def _parse_expression(self) -> t.Optional[exp.Expression]: 4872 return self._parse_alias(self._parse_assignment()) 4873 4874 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4875 this = self._parse_disjunction() 4876 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4877 # This allows us to parse <non-identifier token> := <expr> 4878 this = exp.column( 4879 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4880 ) 4881 4882 while self._match_set(self.ASSIGNMENT): 4883 if isinstance(this, exp.Column) and len(this.parts) == 1: 4884 this = this.this 4885 4886 this = self.expression( 4887 self.ASSIGNMENT[self._prev.token_type], 4888 this=this, 4889 comments=self._prev_comments, 4890 expression=self._parse_assignment(), 4891 ) 4892 4893 return this 4894 4895 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4896 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4897 4898 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4899 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4900 4901 def _parse_equality(self) -> t.Optional[exp.Expression]: 4902 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4903 4904 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4905 return self._parse_tokens(self._parse_range, self.COMPARISON) 4906 4907 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4908 this = this or self._parse_bitwise() 4909 negate = self._match(TokenType.NOT) 4910 4911 if self._match_set(self.RANGE_PARSERS): 4912 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4913 if not expression: 4914 return this 4915 4916 this = expression 4917 elif self._match(TokenType.ISNULL): 4918 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4919 4920 # Postgres supports ISNULL and NOTNULL for conditions. 4921 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4922 if self._match(TokenType.NOTNULL): 4923 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4924 this = self.expression(exp.Not, this=this) 4925 4926 if negate: 4927 this = self._negate_range(this) 4928 4929 if self._match(TokenType.IS): 4930 this = self._parse_is(this) 4931 4932 return this 4933 4934 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4935 if not this: 4936 return this 4937 4938 return self.expression(exp.Not, this=this) 4939 4940 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4941 index = self._index - 1 4942 negate = self._match(TokenType.NOT) 4943 4944 if self._match_text_seq("DISTINCT", "FROM"): 4945 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4946 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4947 4948 if self._match(TokenType.JSON): 4949 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4950 4951 if self._match_text_seq("WITH"): 4952 _with = True 4953 elif self._match_text_seq("WITHOUT"): 4954 _with = False 4955 else: 4956 _with = None 4957 4958 unique = self._match(TokenType.UNIQUE) 4959 self._match_text_seq("KEYS") 4960 expression: t.Optional[exp.Expression] = self.expression( 4961 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4962 ) 4963 else: 4964 expression = self._parse_primary() or self._parse_null() 4965 if not expression: 4966 self._retreat(index) 4967 return None 4968 4969 this = self.expression(exp.Is, this=this, expression=expression) 4970 return self.expression(exp.Not, this=this) if negate else this 4971 4972 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4973 unnest = self._parse_unnest(with_alias=False) 4974 if unnest: 4975 this = self.expression(exp.In, this=this, unnest=unnest) 4976 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4977 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4978 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4979 4980 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4981 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4982 else: 4983 this = self.expression(exp.In, this=this, expressions=expressions) 4984 4985 if matched_l_paren: 4986 self._match_r_paren(this) 4987 elif not self._match(TokenType.R_BRACKET, expression=this): 4988 self.raise_error("Expecting ]") 4989 else: 4990 this = self.expression(exp.In, this=this, field=self._parse_column()) 4991 4992 return this 4993 4994 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4995 symmetric = None 4996 if self._match_text_seq("SYMMETRIC"): 4997 symmetric = True 4998 elif self._match_text_seq("ASYMMETRIC"): 4999 symmetric = False 5000 5001 low = self._parse_bitwise() 5002 self._match(TokenType.AND) 5003 high = self._parse_bitwise() 5004 5005 return self.expression( 5006 exp.Between, 5007 this=this, 5008 low=low, 5009 high=high, 5010 symmetric=symmetric, 5011 ) 5012 5013 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5014 if not self._match(TokenType.ESCAPE): 5015 return this 5016 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5017 5018 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5019 index = self._index 5020 5021 if not self._match(TokenType.INTERVAL) and match_interval: 5022 return None 5023 5024 if self._match(TokenType.STRING, advance=False): 5025 this = self._parse_primary() 5026 else: 5027 this = self._parse_term() 5028 5029 if not this or ( 5030 isinstance(this, exp.Column) 5031 and not this.table 5032 and not this.this.quoted 5033 and this.name.upper() == "IS" 5034 ): 5035 self._retreat(index) 5036 return None 5037 5038 unit = self._parse_function() or ( 5039 not self._match(TokenType.ALIAS, advance=False) 5040 and self._parse_var(any_token=True, upper=True) 5041 ) 5042 5043 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5044 # each INTERVAL expression into this canonical form so it's easy to transpile 5045 if this and this.is_number: 5046 this = exp.Literal.string(this.to_py()) 5047 elif this and this.is_string: 5048 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5049 if parts and unit: 5050 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5051 unit = None 5052 self._retreat(self._index - 1) 5053 5054 if len(parts) == 1: 5055 this = exp.Literal.string(parts[0][0]) 5056 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5057 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5058 unit = self.expression( 5059 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5060 ) 5061 5062 interval = self.expression(exp.Interval, this=this, unit=unit) 5063 5064 index = self._index 5065 self._match(TokenType.PLUS) 5066 5067 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5068 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5069 return self.expression( 5070 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5071 ) 5072 5073 self._retreat(index) 5074 return interval 5075 5076 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5077 this = self._parse_term() 5078 5079 while True: 5080 if self._match_set(self.BITWISE): 5081 this = self.expression( 5082 self.BITWISE[self._prev.token_type], 5083 this=this, 5084 expression=self._parse_term(), 5085 ) 5086 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5087 this = self.expression( 5088 exp.DPipe, 5089 this=this, 5090 expression=self._parse_term(), 5091 safe=not self.dialect.STRICT_STRING_CONCAT, 5092 ) 5093 elif self._match(TokenType.DQMARK): 5094 this = self.expression( 5095 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5096 ) 5097 elif self._match_pair(TokenType.LT, TokenType.LT): 5098 this = self.expression( 5099 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5100 ) 5101 elif self._match_pair(TokenType.GT, TokenType.GT): 5102 this = self.expression( 5103 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5104 ) 5105 else: 5106 break 5107 5108 return this 5109 5110 def _parse_term(self) -> t.Optional[exp.Expression]: 5111 this = self._parse_factor() 5112 5113 while self._match_set(self.TERM): 5114 klass = self.TERM[self._prev.token_type] 5115 comments = self._prev_comments 5116 expression = self._parse_factor() 5117 5118 this = self.expression(klass, this=this, comments=comments, expression=expression) 5119 5120 if isinstance(this, exp.Collate): 5121 expr = this.expression 5122 5123 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5124 # fallback to Identifier / Var 5125 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5126 ident = expr.this 5127 if isinstance(ident, exp.Identifier): 5128 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5129 5130 return this 5131 5132 def _parse_factor(self) -> t.Optional[exp.Expression]: 5133 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5134 this = parse_method() 5135 5136 while self._match_set(self.FACTOR): 5137 klass = self.FACTOR[self._prev.token_type] 5138 comments = self._prev_comments 5139 expression = parse_method() 5140 5141 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5142 self._retreat(self._index - 1) 5143 return this 5144 5145 this = self.expression(klass, this=this, comments=comments, expression=expression) 5146 5147 if isinstance(this, exp.Div): 5148 this.args["typed"] = self.dialect.TYPED_DIVISION 5149 this.args["safe"] = self.dialect.SAFE_DIVISION 5150 5151 return this 5152 5153 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5154 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5155 5156 def _parse_unary(self) -> t.Optional[exp.Expression]: 5157 if self._match_set(self.UNARY_PARSERS): 5158 return self.UNARY_PARSERS[self._prev.token_type](self) 5159 return self._parse_at_time_zone(self._parse_type()) 5160 5161 def _parse_type( 5162 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5163 ) -> t.Optional[exp.Expression]: 5164 interval = parse_interval and self._parse_interval() 5165 if interval: 5166 return interval 5167 5168 index = self._index 5169 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5170 5171 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5172 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5173 if isinstance(data_type, exp.Cast): 5174 # This constructor can contain ops directly after it, for instance struct unnesting: 5175 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5176 return self._parse_column_ops(data_type) 5177 5178 if data_type: 5179 index2 = self._index 5180 this = self._parse_primary() 5181 5182 if isinstance(this, exp.Literal): 5183 literal = this.name 5184 this = self._parse_column_ops(this) 5185 5186 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5187 if parser: 5188 return parser(self, this, data_type) 5189 5190 if ( 5191 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5192 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5193 and TIME_ZONE_RE.search(literal) 5194 ): 5195 data_type = exp.DataType.build("TIMESTAMPTZ") 5196 5197 return self.expression(exp.Cast, this=this, to=data_type) 5198 5199 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5200 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5201 # 5202 # If the index difference here is greater than 1, that means the parser itself must have 5203 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5204 # 5205 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5206 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5207 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5208 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5209 # 5210 # In these cases, we don't really want to return the converted type, but instead retreat 5211 # and try to parse a Column or Identifier in the section below. 5212 if data_type.expressions and index2 - index > 1: 5213 self._retreat(index2) 5214 return self._parse_column_ops(data_type) 5215 5216 self._retreat(index) 5217 5218 if fallback_to_identifier: 5219 return self._parse_id_var() 5220 5221 this = self._parse_column() 5222 return this and self._parse_column_ops(this) 5223 5224 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5225 this = self._parse_type() 5226 if not this: 5227 return None 5228 5229 if isinstance(this, exp.Column) and not this.table: 5230 this = exp.var(this.name.upper()) 5231 5232 return self.expression( 5233 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5234 ) 5235 5236 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5237 type_name = identifier.name 5238 5239 while self._match(TokenType.DOT): 5240 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5241 5242 return exp.DataType.build(type_name, udt=True) 5243 5244 def _parse_types( 5245 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5246 ) -> t.Optional[exp.Expression]: 5247 index = self._index 5248 5249 this: t.Optional[exp.Expression] = None 5250 prefix = self._match_text_seq("SYSUDTLIB", ".") 5251 5252 if not self._match_set(self.TYPE_TOKENS): 5253 identifier = allow_identifiers and self._parse_id_var( 5254 any_token=False, tokens=(TokenType.VAR,) 5255 ) 5256 if isinstance(identifier, exp.Identifier): 5257 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5258 5259 if len(tokens) != 1: 5260 self.raise_error("Unexpected identifier", self._prev) 5261 5262 if tokens[0].token_type in self.TYPE_TOKENS: 5263 self._prev = tokens[0] 5264 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5265 this = self._parse_user_defined_type(identifier) 5266 else: 5267 self._retreat(self._index - 1) 5268 return None 5269 else: 5270 return None 5271 5272 type_token = self._prev.token_type 5273 5274 if type_token == TokenType.PSEUDO_TYPE: 5275 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5276 5277 if type_token == TokenType.OBJECT_IDENTIFIER: 5278 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5279 5280 # https://materialize.com/docs/sql/types/map/ 5281 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5282 key_type = self._parse_types( 5283 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5284 ) 5285 if not self._match(TokenType.FARROW): 5286 self._retreat(index) 5287 return None 5288 5289 value_type = self._parse_types( 5290 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5291 ) 5292 if not self._match(TokenType.R_BRACKET): 5293 self._retreat(index) 5294 return None 5295 5296 return exp.DataType( 5297 this=exp.DataType.Type.MAP, 5298 expressions=[key_type, value_type], 5299 nested=True, 5300 prefix=prefix, 5301 ) 5302 5303 nested = type_token in self.NESTED_TYPE_TOKENS 5304 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5305 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5306 expressions = None 5307 maybe_func = False 5308 5309 if self._match(TokenType.L_PAREN): 5310 if is_struct: 5311 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5312 elif nested: 5313 expressions = self._parse_csv( 5314 lambda: self._parse_types( 5315 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5316 ) 5317 ) 5318 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5319 this = expressions[0] 5320 this.set("nullable", True) 5321 self._match_r_paren() 5322 return this 5323 elif type_token in self.ENUM_TYPE_TOKENS: 5324 expressions = self._parse_csv(self._parse_equality) 5325 elif is_aggregate: 5326 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5327 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5328 ) 5329 if not func_or_ident: 5330 return None 5331 expressions = [func_or_ident] 5332 if self._match(TokenType.COMMA): 5333 expressions.extend( 5334 self._parse_csv( 5335 lambda: self._parse_types( 5336 check_func=check_func, 5337 schema=schema, 5338 allow_identifiers=allow_identifiers, 5339 ) 5340 ) 5341 ) 5342 else: 5343 expressions = self._parse_csv(self._parse_type_size) 5344 5345 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5346 if type_token == TokenType.VECTOR and len(expressions) == 2: 5347 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5348 5349 if not expressions or not self._match(TokenType.R_PAREN): 5350 self._retreat(index) 5351 return None 5352 5353 maybe_func = True 5354 5355 values: t.Optional[t.List[exp.Expression]] = None 5356 5357 if nested and self._match(TokenType.LT): 5358 if is_struct: 5359 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5360 else: 5361 expressions = self._parse_csv( 5362 lambda: self._parse_types( 5363 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5364 ) 5365 ) 5366 5367 if not self._match(TokenType.GT): 5368 self.raise_error("Expecting >") 5369 5370 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5371 values = self._parse_csv(self._parse_assignment) 5372 if not values and is_struct: 5373 values = None 5374 self._retreat(self._index - 1) 5375 else: 5376 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5377 5378 if type_token in self.TIMESTAMPS: 5379 if self._match_text_seq("WITH", "TIME", "ZONE"): 5380 maybe_func = False 5381 tz_type = ( 5382 exp.DataType.Type.TIMETZ 5383 if type_token in self.TIMES 5384 else exp.DataType.Type.TIMESTAMPTZ 5385 ) 5386 this = exp.DataType(this=tz_type, expressions=expressions) 5387 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5388 maybe_func = False 5389 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5390 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5391 maybe_func = False 5392 elif type_token == TokenType.INTERVAL: 5393 unit = self._parse_var(upper=True) 5394 if unit: 5395 if self._match_text_seq("TO"): 5396 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5397 5398 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5399 else: 5400 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5401 elif type_token == TokenType.VOID: 5402 this = exp.DataType(this=exp.DataType.Type.NULL) 5403 5404 if maybe_func and check_func: 5405 index2 = self._index 5406 peek = self._parse_string() 5407 5408 if not peek: 5409 self._retreat(index) 5410 return None 5411 5412 self._retreat(index2) 5413 5414 if not this: 5415 if self._match_text_seq("UNSIGNED"): 5416 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5417 if not unsigned_type_token: 5418 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5419 5420 type_token = unsigned_type_token or type_token 5421 5422 this = exp.DataType( 5423 this=exp.DataType.Type[type_token.value], 5424 expressions=expressions, 5425 nested=nested, 5426 prefix=prefix, 5427 ) 5428 5429 # Empty arrays/structs are allowed 5430 if values is not None: 5431 cls = exp.Struct if is_struct else exp.Array 5432 this = exp.cast(cls(expressions=values), this, copy=False) 5433 5434 elif expressions: 5435 this.set("expressions", expressions) 5436 5437 # https://materialize.com/docs/sql/types/list/#type-name 5438 while self._match(TokenType.LIST): 5439 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5440 5441 index = self._index 5442 5443 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5444 matched_array = self._match(TokenType.ARRAY) 5445 5446 while self._curr: 5447 datatype_token = self._prev.token_type 5448 matched_l_bracket = self._match(TokenType.L_BRACKET) 5449 5450 if (not matched_l_bracket and not matched_array) or ( 5451 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5452 ): 5453 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5454 # not to be confused with the fixed size array parsing 5455 break 5456 5457 matched_array = False 5458 values = self._parse_csv(self._parse_assignment) or None 5459 if ( 5460 values 5461 and not schema 5462 and ( 5463 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5464 ) 5465 ): 5466 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5467 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5468 self._retreat(index) 5469 break 5470 5471 this = exp.DataType( 5472 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5473 ) 5474 self._match(TokenType.R_BRACKET) 5475 5476 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5477 converter = self.TYPE_CONVERTERS.get(this.this) 5478 if converter: 5479 this = converter(t.cast(exp.DataType, this)) 5480 5481 return this 5482 5483 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5484 index = self._index 5485 5486 if ( 5487 self._curr 5488 and self._next 5489 and self._curr.token_type in self.TYPE_TOKENS 5490 and self._next.token_type in self.TYPE_TOKENS 5491 ): 5492 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5493 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5494 this = self._parse_id_var() 5495 else: 5496 this = ( 5497 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5498 or self._parse_id_var() 5499 ) 5500 5501 self._match(TokenType.COLON) 5502 5503 if ( 5504 type_required 5505 and not isinstance(this, exp.DataType) 5506 and not self._match_set(self.TYPE_TOKENS, advance=False) 5507 ): 5508 self._retreat(index) 5509 return self._parse_types() 5510 5511 return self._parse_column_def(this) 5512 5513 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5514 if not self._match_text_seq("AT", "TIME", "ZONE"): 5515 return this 5516 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5517 5518 def _parse_column(self) -> t.Optional[exp.Expression]: 5519 this = self._parse_column_reference() 5520 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5521 5522 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5523 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5524 5525 return column 5526 5527 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5528 this = self._parse_field() 5529 if ( 5530 not this 5531 and self._match(TokenType.VALUES, advance=False) 5532 and self.VALUES_FOLLOWED_BY_PAREN 5533 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5534 ): 5535 this = self._parse_id_var() 5536 5537 if isinstance(this, exp.Identifier): 5538 # We bubble up comments from the Identifier to the Column 5539 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5540 5541 return this 5542 5543 def _parse_colon_as_variant_extract( 5544 self, this: t.Optional[exp.Expression] 5545 ) -> t.Optional[exp.Expression]: 5546 casts = [] 5547 json_path = [] 5548 escape = None 5549 5550 while self._match(TokenType.COLON): 5551 start_index = self._index 5552 5553 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5554 path = self._parse_column_ops( 5555 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5556 ) 5557 5558 # The cast :: operator has a lower precedence than the extraction operator :, so 5559 # we rearrange the AST appropriately to avoid casting the JSON path 5560 while isinstance(path, exp.Cast): 5561 casts.append(path.to) 5562 path = path.this 5563 5564 if casts: 5565 dcolon_offset = next( 5566 i 5567 for i, t in enumerate(self._tokens[start_index:]) 5568 if t.token_type == TokenType.DCOLON 5569 ) 5570 end_token = self._tokens[start_index + dcolon_offset - 1] 5571 else: 5572 end_token = self._prev 5573 5574 if path: 5575 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5576 # it'll roundtrip to a string literal in GET_PATH 5577 if isinstance(path, exp.Identifier) and path.quoted: 5578 escape = True 5579 5580 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5581 5582 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5583 # Databricks transforms it back to the colon/dot notation 5584 if json_path: 5585 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5586 5587 if json_path_expr: 5588 json_path_expr.set("escape", escape) 5589 5590 this = self.expression( 5591 exp.JSONExtract, 5592 this=this, 5593 expression=json_path_expr, 5594 variant_extract=True, 5595 ) 5596 5597 while casts: 5598 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5599 5600 return this 5601 5602 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5603 return self._parse_types() 5604 5605 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5606 this = self._parse_bracket(this) 5607 5608 while self._match_set(self.COLUMN_OPERATORS): 5609 op_token = self._prev.token_type 5610 op = self.COLUMN_OPERATORS.get(op_token) 5611 5612 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5613 field = self._parse_dcolon() 5614 if not field: 5615 self.raise_error("Expected type") 5616 elif op and self._curr: 5617 field = self._parse_column_reference() or self._parse_bracket() 5618 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5619 field = self._parse_column_ops(field) 5620 else: 5621 field = self._parse_field(any_token=True, anonymous_func=True) 5622 5623 # Function calls can be qualified, e.g., x.y.FOO() 5624 # This converts the final AST to a series of Dots leading to the function call 5625 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5626 if isinstance(field, (exp.Func, exp.Window)) and this: 5627 this = this.transform( 5628 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5629 ) 5630 5631 if op: 5632 this = op(self, this, field) 5633 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5634 this = self.expression( 5635 exp.Column, 5636 comments=this.comments, 5637 this=field, 5638 table=this.this, 5639 db=this.args.get("table"), 5640 catalog=this.args.get("db"), 5641 ) 5642 elif isinstance(field, exp.Window): 5643 # Move the exp.Dot's to the window's function 5644 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5645 field.set("this", window_func) 5646 this = field 5647 else: 5648 this = self.expression(exp.Dot, this=this, expression=field) 5649 5650 if field and field.comments: 5651 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5652 5653 this = self._parse_bracket(this) 5654 5655 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5656 5657 def _parse_paren(self) -> t.Optional[exp.Expression]: 5658 if not self._match(TokenType.L_PAREN): 5659 return None 5660 5661 comments = self._prev_comments 5662 query = self._parse_select() 5663 5664 if query: 5665 expressions = [query] 5666 else: 5667 expressions = self._parse_expressions() 5668 5669 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5670 5671 if not this and self._match(TokenType.R_PAREN, advance=False): 5672 this = self.expression(exp.Tuple) 5673 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5674 this = self._parse_subquery(this=this, parse_alias=False) 5675 elif isinstance(this, exp.Subquery): 5676 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5677 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5678 this = self.expression(exp.Tuple, expressions=expressions) 5679 else: 5680 this = self.expression(exp.Paren, this=this) 5681 5682 if this: 5683 this.add_comments(comments) 5684 5685 self._match_r_paren(expression=this) 5686 return this 5687 5688 def _parse_primary(self) -> t.Optional[exp.Expression]: 5689 if self._match_set(self.PRIMARY_PARSERS): 5690 token_type = self._prev.token_type 5691 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5692 5693 if token_type == TokenType.STRING: 5694 expressions = [primary] 5695 while self._match(TokenType.STRING): 5696 expressions.append(exp.Literal.string(self._prev.text)) 5697 5698 if len(expressions) > 1: 5699 return self.expression(exp.Concat, expressions=expressions) 5700 5701 return primary 5702 5703 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5704 return exp.Literal.number(f"0.{self._prev.text}") 5705 5706 return self._parse_paren() 5707 5708 def _parse_field( 5709 self, 5710 any_token: bool = False, 5711 tokens: t.Optional[t.Collection[TokenType]] = None, 5712 anonymous_func: bool = False, 5713 ) -> t.Optional[exp.Expression]: 5714 if anonymous_func: 5715 field = ( 5716 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5717 or self._parse_primary() 5718 ) 5719 else: 5720 field = self._parse_primary() or self._parse_function( 5721 anonymous=anonymous_func, any_token=any_token 5722 ) 5723 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5724 5725 def _parse_function( 5726 self, 5727 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5728 anonymous: bool = False, 5729 optional_parens: bool = True, 5730 any_token: bool = False, 5731 ) -> t.Optional[exp.Expression]: 5732 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5733 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5734 fn_syntax = False 5735 if ( 5736 self._match(TokenType.L_BRACE, advance=False) 5737 and self._next 5738 and self._next.text.upper() == "FN" 5739 ): 5740 self._advance(2) 5741 fn_syntax = True 5742 5743 func = self._parse_function_call( 5744 functions=functions, 5745 anonymous=anonymous, 5746 optional_parens=optional_parens, 5747 any_token=any_token, 5748 ) 5749 5750 if fn_syntax: 5751 self._match(TokenType.R_BRACE) 5752 5753 return func 5754 5755 def _parse_function_call( 5756 self, 5757 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5758 anonymous: bool = False, 5759 optional_parens: bool = True, 5760 any_token: bool = False, 5761 ) -> t.Optional[exp.Expression]: 5762 if not self._curr: 5763 return None 5764 5765 comments = self._curr.comments 5766 token = self._curr 5767 token_type = self._curr.token_type 5768 this = self._curr.text 5769 upper = this.upper() 5770 5771 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5772 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5773 self._advance() 5774 return self._parse_window(parser(self)) 5775 5776 if not self._next or self._next.token_type != TokenType.L_PAREN: 5777 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5778 self._advance() 5779 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5780 5781 return None 5782 5783 if any_token: 5784 if token_type in self.RESERVED_TOKENS: 5785 return None 5786 elif token_type not in self.FUNC_TOKENS: 5787 return None 5788 5789 self._advance(2) 5790 5791 parser = self.FUNCTION_PARSERS.get(upper) 5792 if parser and not anonymous: 5793 this = parser(self) 5794 else: 5795 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5796 5797 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5798 this = self.expression( 5799 subquery_predicate, comments=comments, this=self._parse_select() 5800 ) 5801 self._match_r_paren() 5802 return this 5803 5804 if functions is None: 5805 functions = self.FUNCTIONS 5806 5807 function = functions.get(upper) 5808 known_function = function and not anonymous 5809 5810 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5811 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5812 5813 post_func_comments = self._curr and self._curr.comments 5814 if known_function and post_func_comments: 5815 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5816 # call we'll construct it as exp.Anonymous, even if it's "known" 5817 if any( 5818 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5819 for comment in post_func_comments 5820 ): 5821 known_function = False 5822 5823 if alias and known_function: 5824 args = self._kv_to_prop_eq(args) 5825 5826 if known_function: 5827 func_builder = t.cast(t.Callable, function) 5828 5829 if "dialect" in func_builder.__code__.co_varnames: 5830 func = func_builder(args, dialect=self.dialect) 5831 else: 5832 func = func_builder(args) 5833 5834 func = self.validate_expression(func, args) 5835 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5836 func.meta["name"] = this 5837 5838 this = func 5839 else: 5840 if token_type == TokenType.IDENTIFIER: 5841 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5842 5843 this = self.expression(exp.Anonymous, this=this, expressions=args) 5844 this = this.update_positions(token) 5845 5846 if isinstance(this, exp.Expression): 5847 this.add_comments(comments) 5848 5849 self._match_r_paren(this) 5850 return self._parse_window(this) 5851 5852 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5853 return expression 5854 5855 def _kv_to_prop_eq( 5856 self, expressions: t.List[exp.Expression], parse_map: bool = False 5857 ) -> t.List[exp.Expression]: 5858 transformed = [] 5859 5860 for index, e in enumerate(expressions): 5861 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5862 if isinstance(e, exp.Alias): 5863 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5864 5865 if not isinstance(e, exp.PropertyEQ): 5866 e = self.expression( 5867 exp.PropertyEQ, 5868 this=e.this if parse_map else exp.to_identifier(e.this.name), 5869 expression=e.expression, 5870 ) 5871 5872 if isinstance(e.this, exp.Column): 5873 e.this.replace(e.this.this) 5874 else: 5875 e = self._to_prop_eq(e, index) 5876 5877 transformed.append(e) 5878 5879 return transformed 5880 5881 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5882 return self._parse_statement() 5883 5884 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5885 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5886 5887 def _parse_user_defined_function( 5888 self, kind: t.Optional[TokenType] = None 5889 ) -> t.Optional[exp.Expression]: 5890 this = self._parse_table_parts(schema=True) 5891 5892 if not self._match(TokenType.L_PAREN): 5893 return this 5894 5895 expressions = self._parse_csv(self._parse_function_parameter) 5896 self._match_r_paren() 5897 return self.expression( 5898 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5899 ) 5900 5901 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5902 literal = self._parse_primary() 5903 if literal: 5904 return self.expression(exp.Introducer, this=token.text, expression=literal) 5905 5906 return self._identifier_expression(token) 5907 5908 def _parse_session_parameter(self) -> exp.SessionParameter: 5909 kind = None 5910 this = self._parse_id_var() or self._parse_primary() 5911 5912 if this and self._match(TokenType.DOT): 5913 kind = this.name 5914 this = self._parse_var() or self._parse_primary() 5915 5916 return self.expression(exp.SessionParameter, this=this, kind=kind) 5917 5918 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5919 return self._parse_id_var() 5920 5921 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5922 index = self._index 5923 5924 if self._match(TokenType.L_PAREN): 5925 expressions = t.cast( 5926 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5927 ) 5928 5929 if not self._match(TokenType.R_PAREN): 5930 self._retreat(index) 5931 else: 5932 expressions = [self._parse_lambda_arg()] 5933 5934 if self._match_set(self.LAMBDAS): 5935 return self.LAMBDAS[self._prev.token_type](self, expressions) 5936 5937 self._retreat(index) 5938 5939 this: t.Optional[exp.Expression] 5940 5941 if self._match(TokenType.DISTINCT): 5942 this = self.expression( 5943 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5944 ) 5945 else: 5946 this = self._parse_select_or_expression(alias=alias) 5947 5948 return self._parse_limit( 5949 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5950 ) 5951 5952 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5953 index = self._index 5954 if not self._match(TokenType.L_PAREN): 5955 return this 5956 5957 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5958 # expr can be of both types 5959 if self._match_set(self.SELECT_START_TOKENS): 5960 self._retreat(index) 5961 return this 5962 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5963 self._match_r_paren() 5964 return self.expression(exp.Schema, this=this, expressions=args) 5965 5966 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5967 return self._parse_column_def(self._parse_field(any_token=True)) 5968 5969 def _parse_column_def( 5970 self, this: t.Optional[exp.Expression], computed_column: bool = True 5971 ) -> t.Optional[exp.Expression]: 5972 # column defs are not really columns, they're identifiers 5973 if isinstance(this, exp.Column): 5974 this = this.this 5975 5976 if not computed_column: 5977 self._match(TokenType.ALIAS) 5978 5979 kind = self._parse_types(schema=True) 5980 5981 if self._match_text_seq("FOR", "ORDINALITY"): 5982 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5983 5984 constraints: t.List[exp.Expression] = [] 5985 5986 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5987 ("ALIAS", "MATERIALIZED") 5988 ): 5989 persisted = self._prev.text.upper() == "MATERIALIZED" 5990 constraint_kind = exp.ComputedColumnConstraint( 5991 this=self._parse_assignment(), 5992 persisted=persisted or self._match_text_seq("PERSISTED"), 5993 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5994 ) 5995 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5996 elif ( 5997 kind 5998 and self._match(TokenType.ALIAS, advance=False) 5999 and ( 6000 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6001 or (self._next and self._next.token_type == TokenType.L_PAREN) 6002 ) 6003 ): 6004 self._advance() 6005 constraints.append( 6006 self.expression( 6007 exp.ColumnConstraint, 6008 kind=exp.ComputedColumnConstraint( 6009 this=self._parse_disjunction(), 6010 persisted=self._match_texts(("STORED", "VIRTUAL")) 6011 and self._prev.text.upper() == "STORED", 6012 ), 6013 ) 6014 ) 6015 6016 while True: 6017 constraint = self._parse_column_constraint() 6018 if not constraint: 6019 break 6020 constraints.append(constraint) 6021 6022 if not kind and not constraints: 6023 return this 6024 6025 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6026 6027 def _parse_auto_increment( 6028 self, 6029 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6030 start = None 6031 increment = None 6032 order = None 6033 6034 if self._match(TokenType.L_PAREN, advance=False): 6035 args = self._parse_wrapped_csv(self._parse_bitwise) 6036 start = seq_get(args, 0) 6037 increment = seq_get(args, 1) 6038 elif self._match_text_seq("START"): 6039 start = self._parse_bitwise() 6040 self._match_text_seq("INCREMENT") 6041 increment = self._parse_bitwise() 6042 if self._match_text_seq("ORDER"): 6043 order = True 6044 elif self._match_text_seq("NOORDER"): 6045 order = False 6046 6047 if start and increment: 6048 return exp.GeneratedAsIdentityColumnConstraint( 6049 start=start, increment=increment, this=False, order=order 6050 ) 6051 6052 return exp.AutoIncrementColumnConstraint() 6053 6054 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6055 if not self._match_text_seq("REFRESH"): 6056 self._retreat(self._index - 1) 6057 return None 6058 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6059 6060 def _parse_compress(self) -> exp.CompressColumnConstraint: 6061 if self._match(TokenType.L_PAREN, advance=False): 6062 return self.expression( 6063 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6064 ) 6065 6066 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6067 6068 def _parse_generated_as_identity( 6069 self, 6070 ) -> ( 6071 exp.GeneratedAsIdentityColumnConstraint 6072 | exp.ComputedColumnConstraint 6073 | exp.GeneratedAsRowColumnConstraint 6074 ): 6075 if self._match_text_seq("BY", "DEFAULT"): 6076 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6077 this = self.expression( 6078 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6079 ) 6080 else: 6081 self._match_text_seq("ALWAYS") 6082 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6083 6084 self._match(TokenType.ALIAS) 6085 6086 if self._match_text_seq("ROW"): 6087 start = self._match_text_seq("START") 6088 if not start: 6089 self._match(TokenType.END) 6090 hidden = self._match_text_seq("HIDDEN") 6091 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6092 6093 identity = self._match_text_seq("IDENTITY") 6094 6095 if self._match(TokenType.L_PAREN): 6096 if self._match(TokenType.START_WITH): 6097 this.set("start", self._parse_bitwise()) 6098 if self._match_text_seq("INCREMENT", "BY"): 6099 this.set("increment", self._parse_bitwise()) 6100 if self._match_text_seq("MINVALUE"): 6101 this.set("minvalue", self._parse_bitwise()) 6102 if self._match_text_seq("MAXVALUE"): 6103 this.set("maxvalue", self._parse_bitwise()) 6104 6105 if self._match_text_seq("CYCLE"): 6106 this.set("cycle", True) 6107 elif self._match_text_seq("NO", "CYCLE"): 6108 this.set("cycle", False) 6109 6110 if not identity: 6111 this.set("expression", self._parse_range()) 6112 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6113 args = self._parse_csv(self._parse_bitwise) 6114 this.set("start", seq_get(args, 0)) 6115 this.set("increment", seq_get(args, 1)) 6116 6117 self._match_r_paren() 6118 6119 return this 6120 6121 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6122 self._match_text_seq("LENGTH") 6123 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6124 6125 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6126 if self._match_text_seq("NULL"): 6127 return self.expression(exp.NotNullColumnConstraint) 6128 if self._match_text_seq("CASESPECIFIC"): 6129 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6130 if self._match_text_seq("FOR", "REPLICATION"): 6131 return self.expression(exp.NotForReplicationColumnConstraint) 6132 6133 # Unconsume the `NOT` token 6134 self._retreat(self._index - 1) 6135 return None 6136 6137 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6138 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6139 6140 procedure_option_follows = ( 6141 self._match(TokenType.WITH, advance=False) 6142 and self._next 6143 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6144 ) 6145 6146 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6147 return self.expression( 6148 exp.ColumnConstraint, 6149 this=this, 6150 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6151 ) 6152 6153 return this 6154 6155 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6156 if not self._match(TokenType.CONSTRAINT): 6157 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6158 6159 return self.expression( 6160 exp.Constraint, 6161 this=self._parse_id_var(), 6162 expressions=self._parse_unnamed_constraints(), 6163 ) 6164 6165 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6166 constraints = [] 6167 while True: 6168 constraint = self._parse_unnamed_constraint() or self._parse_function() 6169 if not constraint: 6170 break 6171 constraints.append(constraint) 6172 6173 return constraints 6174 6175 def _parse_unnamed_constraint( 6176 self, constraints: t.Optional[t.Collection[str]] = None 6177 ) -> t.Optional[exp.Expression]: 6178 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6179 constraints or self.CONSTRAINT_PARSERS 6180 ): 6181 return None 6182 6183 constraint = self._prev.text.upper() 6184 if constraint not in self.CONSTRAINT_PARSERS: 6185 self.raise_error(f"No parser found for schema constraint {constraint}.") 6186 6187 return self.CONSTRAINT_PARSERS[constraint](self) 6188 6189 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6190 return self._parse_id_var(any_token=False) 6191 6192 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6193 self._match_text_seq("KEY") 6194 return self.expression( 6195 exp.UniqueColumnConstraint, 6196 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6197 this=self._parse_schema(self._parse_unique_key()), 6198 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6199 on_conflict=self._parse_on_conflict(), 6200 options=self._parse_key_constraint_options(), 6201 ) 6202 6203 def _parse_key_constraint_options(self) -> t.List[str]: 6204 options = [] 6205 while True: 6206 if not self._curr: 6207 break 6208 6209 if self._match(TokenType.ON): 6210 action = None 6211 on = self._advance_any() and self._prev.text 6212 6213 if self._match_text_seq("NO", "ACTION"): 6214 action = "NO ACTION" 6215 elif self._match_text_seq("CASCADE"): 6216 action = "CASCADE" 6217 elif self._match_text_seq("RESTRICT"): 6218 action = "RESTRICT" 6219 elif self._match_pair(TokenType.SET, TokenType.NULL): 6220 action = "SET NULL" 6221 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6222 action = "SET DEFAULT" 6223 else: 6224 self.raise_error("Invalid key constraint") 6225 6226 options.append(f"ON {on} {action}") 6227 else: 6228 var = self._parse_var_from_options( 6229 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6230 ) 6231 if not var: 6232 break 6233 options.append(var.name) 6234 6235 return options 6236 6237 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6238 if match and not self._match(TokenType.REFERENCES): 6239 return None 6240 6241 expressions = None 6242 this = self._parse_table(schema=True) 6243 options = self._parse_key_constraint_options() 6244 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6245 6246 def _parse_foreign_key(self) -> exp.ForeignKey: 6247 expressions = ( 6248 self._parse_wrapped_id_vars() 6249 if not self._match(TokenType.REFERENCES, advance=False) 6250 else None 6251 ) 6252 reference = self._parse_references() 6253 on_options = {} 6254 6255 while self._match(TokenType.ON): 6256 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6257 self.raise_error("Expected DELETE or UPDATE") 6258 6259 kind = self._prev.text.lower() 6260 6261 if self._match_text_seq("NO", "ACTION"): 6262 action = "NO ACTION" 6263 elif self._match(TokenType.SET): 6264 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6265 action = "SET " + self._prev.text.upper() 6266 else: 6267 self._advance() 6268 action = self._prev.text.upper() 6269 6270 on_options[kind] = action 6271 6272 return self.expression( 6273 exp.ForeignKey, 6274 expressions=expressions, 6275 reference=reference, 6276 options=self._parse_key_constraint_options(), 6277 **on_options, # type: ignore 6278 ) 6279 6280 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6281 return self._parse_ordered() or self._parse_field() 6282 6283 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6284 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6285 self._retreat(self._index - 1) 6286 return None 6287 6288 id_vars = self._parse_wrapped_id_vars() 6289 return self.expression( 6290 exp.PeriodForSystemTimeConstraint, 6291 this=seq_get(id_vars, 0), 6292 expression=seq_get(id_vars, 1), 6293 ) 6294 6295 def _parse_primary_key( 6296 self, wrapped_optional: bool = False, in_props: bool = False 6297 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6298 desc = ( 6299 self._match_set((TokenType.ASC, TokenType.DESC)) 6300 and self._prev.token_type == TokenType.DESC 6301 ) 6302 6303 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6304 return self.expression( 6305 exp.PrimaryKeyColumnConstraint, 6306 desc=desc, 6307 options=self._parse_key_constraint_options(), 6308 ) 6309 6310 expressions = self._parse_wrapped_csv( 6311 self._parse_primary_key_part, optional=wrapped_optional 6312 ) 6313 6314 return self.expression( 6315 exp.PrimaryKey, 6316 expressions=expressions, 6317 include=self._parse_index_params(), 6318 options=self._parse_key_constraint_options(), 6319 ) 6320 6321 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6322 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6323 6324 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6325 """ 6326 Parses a datetime column in ODBC format. We parse the column into the corresponding 6327 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6328 same as we did for `DATE('yyyy-mm-dd')`. 6329 6330 Reference: 6331 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6332 """ 6333 self._match(TokenType.VAR) 6334 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6335 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6336 if not self._match(TokenType.R_BRACE): 6337 self.raise_error("Expected }") 6338 return expression 6339 6340 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6341 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6342 return this 6343 6344 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6345 map_token = seq_get(self._tokens, self._index - 2) 6346 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6347 else: 6348 parse_map = False 6349 6350 bracket_kind = self._prev.token_type 6351 if ( 6352 bracket_kind == TokenType.L_BRACE 6353 and self._curr 6354 and self._curr.token_type == TokenType.VAR 6355 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6356 ): 6357 return self._parse_odbc_datetime_literal() 6358 6359 expressions = self._parse_csv( 6360 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6361 ) 6362 6363 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6364 self.raise_error("Expected ]") 6365 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6366 self.raise_error("Expected }") 6367 6368 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6369 if bracket_kind == TokenType.L_BRACE: 6370 this = self.expression( 6371 exp.Struct, 6372 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6373 ) 6374 elif not this: 6375 this = build_array_constructor( 6376 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6377 ) 6378 else: 6379 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6380 if constructor_type: 6381 return build_array_constructor( 6382 constructor_type, 6383 args=expressions, 6384 bracket_kind=bracket_kind, 6385 dialect=self.dialect, 6386 ) 6387 6388 expressions = apply_index_offset( 6389 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6390 ) 6391 this = self.expression( 6392 exp.Bracket, 6393 this=this, 6394 expressions=expressions, 6395 comments=this.pop_comments(), 6396 ) 6397 6398 self._add_comments(this) 6399 return self._parse_bracket(this) 6400 6401 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6402 if self._match(TokenType.COLON): 6403 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6404 return this 6405 6406 def _parse_case(self) -> t.Optional[exp.Expression]: 6407 ifs = [] 6408 default = None 6409 6410 comments = self._prev_comments 6411 expression = self._parse_assignment() 6412 6413 while self._match(TokenType.WHEN): 6414 this = self._parse_assignment() 6415 self._match(TokenType.THEN) 6416 then = self._parse_assignment() 6417 ifs.append(self.expression(exp.If, this=this, true=then)) 6418 6419 if self._match(TokenType.ELSE): 6420 default = self._parse_assignment() 6421 6422 if not self._match(TokenType.END): 6423 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6424 default = exp.column("interval") 6425 else: 6426 self.raise_error("Expected END after CASE", self._prev) 6427 6428 return self.expression( 6429 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6430 ) 6431 6432 def _parse_if(self) -> t.Optional[exp.Expression]: 6433 if self._match(TokenType.L_PAREN): 6434 args = self._parse_csv( 6435 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6436 ) 6437 this = self.validate_expression(exp.If.from_arg_list(args), args) 6438 self._match_r_paren() 6439 else: 6440 index = self._index - 1 6441 6442 if self.NO_PAREN_IF_COMMANDS and index == 0: 6443 return self._parse_as_command(self._prev) 6444 6445 condition = self._parse_assignment() 6446 6447 if not condition: 6448 self._retreat(index) 6449 return None 6450 6451 self._match(TokenType.THEN) 6452 true = self._parse_assignment() 6453 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6454 self._match(TokenType.END) 6455 this = self.expression(exp.If, this=condition, true=true, false=false) 6456 6457 return this 6458 6459 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6460 if not self._match_text_seq("VALUE", "FOR"): 6461 self._retreat(self._index - 1) 6462 return None 6463 6464 return self.expression( 6465 exp.NextValueFor, 6466 this=self._parse_column(), 6467 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6468 ) 6469 6470 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6471 this = self._parse_function() or self._parse_var_or_string(upper=True) 6472 6473 if self._match(TokenType.FROM): 6474 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6475 6476 if not self._match(TokenType.COMMA): 6477 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6478 6479 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6480 6481 def _parse_gap_fill(self) -> exp.GapFill: 6482 self._match(TokenType.TABLE) 6483 this = self._parse_table() 6484 6485 self._match(TokenType.COMMA) 6486 args = [this, *self._parse_csv(self._parse_lambda)] 6487 6488 gap_fill = exp.GapFill.from_arg_list(args) 6489 return self.validate_expression(gap_fill, args) 6490 6491 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6492 this = self._parse_assignment() 6493 6494 if not self._match(TokenType.ALIAS): 6495 if self._match(TokenType.COMMA): 6496 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6497 6498 self.raise_error("Expected AS after CAST") 6499 6500 fmt = None 6501 to = self._parse_types() 6502 6503 default = self._match(TokenType.DEFAULT) 6504 if default: 6505 default = self._parse_bitwise() 6506 self._match_text_seq("ON", "CONVERSION", "ERROR") 6507 6508 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6509 fmt_string = self._parse_string() 6510 fmt = self._parse_at_time_zone(fmt_string) 6511 6512 if not to: 6513 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6514 if to.this in exp.DataType.TEMPORAL_TYPES: 6515 this = self.expression( 6516 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6517 this=this, 6518 format=exp.Literal.string( 6519 format_time( 6520 fmt_string.this if fmt_string else "", 6521 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6522 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6523 ) 6524 ), 6525 safe=safe, 6526 ) 6527 6528 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6529 this.set("zone", fmt.args["zone"]) 6530 return this 6531 elif not to: 6532 self.raise_error("Expected TYPE after CAST") 6533 elif isinstance(to, exp.Identifier): 6534 to = exp.DataType.build(to.name, udt=True) 6535 elif to.this == exp.DataType.Type.CHAR: 6536 if self._match(TokenType.CHARACTER_SET): 6537 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6538 6539 return self.expression( 6540 exp.Cast if strict else exp.TryCast, 6541 this=this, 6542 to=to, 6543 format=fmt, 6544 safe=safe, 6545 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6546 default=default, 6547 ) 6548 6549 def _parse_string_agg(self) -> exp.GroupConcat: 6550 if self._match(TokenType.DISTINCT): 6551 args: t.List[t.Optional[exp.Expression]] = [ 6552 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6553 ] 6554 if self._match(TokenType.COMMA): 6555 args.extend(self._parse_csv(self._parse_assignment)) 6556 else: 6557 args = self._parse_csv(self._parse_assignment) # type: ignore 6558 6559 if self._match_text_seq("ON", "OVERFLOW"): 6560 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6561 if self._match_text_seq("ERROR"): 6562 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6563 else: 6564 self._match_text_seq("TRUNCATE") 6565 on_overflow = self.expression( 6566 exp.OverflowTruncateBehavior, 6567 this=self._parse_string(), 6568 with_count=( 6569 self._match_text_seq("WITH", "COUNT") 6570 or not self._match_text_seq("WITHOUT", "COUNT") 6571 ), 6572 ) 6573 else: 6574 on_overflow = None 6575 6576 index = self._index 6577 if not self._match(TokenType.R_PAREN) and args: 6578 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6579 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6580 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6581 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6582 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6583 6584 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6585 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6586 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6587 if not self._match_text_seq("WITHIN", "GROUP"): 6588 self._retreat(index) 6589 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6590 6591 # The corresponding match_r_paren will be called in parse_function (caller) 6592 self._match_l_paren() 6593 6594 return self.expression( 6595 exp.GroupConcat, 6596 this=self._parse_order(this=seq_get(args, 0)), 6597 separator=seq_get(args, 1), 6598 on_overflow=on_overflow, 6599 ) 6600 6601 def _parse_convert( 6602 self, strict: bool, safe: t.Optional[bool] = None 6603 ) -> t.Optional[exp.Expression]: 6604 this = self._parse_bitwise() 6605 6606 if self._match(TokenType.USING): 6607 to: t.Optional[exp.Expression] = self.expression( 6608 exp.CharacterSet, this=self._parse_var() 6609 ) 6610 elif self._match(TokenType.COMMA): 6611 to = self._parse_types() 6612 else: 6613 to = None 6614 6615 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6616 6617 def _parse_xml_table(self) -> exp.XMLTable: 6618 namespaces = None 6619 passing = None 6620 columns = None 6621 6622 if self._match_text_seq("XMLNAMESPACES", "("): 6623 namespaces = self._parse_xml_namespace() 6624 self._match_text_seq(")", ",") 6625 6626 this = self._parse_string() 6627 6628 if self._match_text_seq("PASSING"): 6629 # The BY VALUE keywords are optional and are provided for semantic clarity 6630 self._match_text_seq("BY", "VALUE") 6631 passing = self._parse_csv(self._parse_column) 6632 6633 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6634 6635 if self._match_text_seq("COLUMNS"): 6636 columns = self._parse_csv(self._parse_field_def) 6637 6638 return self.expression( 6639 exp.XMLTable, 6640 this=this, 6641 namespaces=namespaces, 6642 passing=passing, 6643 columns=columns, 6644 by_ref=by_ref, 6645 ) 6646 6647 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6648 namespaces = [] 6649 6650 while True: 6651 if self._match(TokenType.DEFAULT): 6652 uri = self._parse_string() 6653 else: 6654 uri = self._parse_alias(self._parse_string()) 6655 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6656 if not self._match(TokenType.COMMA): 6657 break 6658 6659 return namespaces 6660 6661 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6662 args = self._parse_csv(self._parse_assignment) 6663 6664 if len(args) < 3: 6665 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6666 6667 return self.expression(exp.DecodeCase, expressions=args) 6668 6669 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6670 self._match_text_seq("KEY") 6671 key = self._parse_column() 6672 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6673 self._match_text_seq("VALUE") 6674 value = self._parse_bitwise() 6675 6676 if not key and not value: 6677 return None 6678 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6679 6680 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6681 if not this or not self._match_text_seq("FORMAT", "JSON"): 6682 return this 6683 6684 return self.expression(exp.FormatJson, this=this) 6685 6686 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6687 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6688 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6689 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6690 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6691 else: 6692 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6693 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6694 6695 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6696 6697 if not empty and not error and not null: 6698 return None 6699 6700 return self.expression( 6701 exp.OnCondition, 6702 empty=empty, 6703 error=error, 6704 null=null, 6705 ) 6706 6707 def _parse_on_handling( 6708 self, on: str, *values: str 6709 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6710 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6711 for value in values: 6712 if self._match_text_seq(value, "ON", on): 6713 return f"{value} ON {on}" 6714 6715 index = self._index 6716 if self._match(TokenType.DEFAULT): 6717 default_value = self._parse_bitwise() 6718 if self._match_text_seq("ON", on): 6719 return default_value 6720 6721 self._retreat(index) 6722 6723 return None 6724 6725 @t.overload 6726 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6727 6728 @t.overload 6729 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6730 6731 def _parse_json_object(self, agg=False): 6732 star = self._parse_star() 6733 expressions = ( 6734 [star] 6735 if star 6736 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6737 ) 6738 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6739 6740 unique_keys = None 6741 if self._match_text_seq("WITH", "UNIQUE"): 6742 unique_keys = True 6743 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6744 unique_keys = False 6745 6746 self._match_text_seq("KEYS") 6747 6748 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6749 self._parse_type() 6750 ) 6751 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6752 6753 return self.expression( 6754 exp.JSONObjectAgg if agg else exp.JSONObject, 6755 expressions=expressions, 6756 null_handling=null_handling, 6757 unique_keys=unique_keys, 6758 return_type=return_type, 6759 encoding=encoding, 6760 ) 6761 6762 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6763 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6764 if not self._match_text_seq("NESTED"): 6765 this = self._parse_id_var() 6766 kind = self._parse_types(allow_identifiers=False) 6767 nested = None 6768 else: 6769 this = None 6770 kind = None 6771 nested = True 6772 6773 path = self._match_text_seq("PATH") and self._parse_string() 6774 nested_schema = nested and self._parse_json_schema() 6775 6776 return self.expression( 6777 exp.JSONColumnDef, 6778 this=this, 6779 kind=kind, 6780 path=path, 6781 nested_schema=nested_schema, 6782 ) 6783 6784 def _parse_json_schema(self) -> exp.JSONSchema: 6785 self._match_text_seq("COLUMNS") 6786 return self.expression( 6787 exp.JSONSchema, 6788 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6789 ) 6790 6791 def _parse_json_table(self) -> exp.JSONTable: 6792 this = self._parse_format_json(self._parse_bitwise()) 6793 path = self._match(TokenType.COMMA) and self._parse_string() 6794 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6795 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6796 schema = self._parse_json_schema() 6797 6798 return exp.JSONTable( 6799 this=this, 6800 schema=schema, 6801 path=path, 6802 error_handling=error_handling, 6803 empty_handling=empty_handling, 6804 ) 6805 6806 def _parse_match_against(self) -> exp.MatchAgainst: 6807 expressions = self._parse_csv(self._parse_column) 6808 6809 self._match_text_seq(")", "AGAINST", "(") 6810 6811 this = self._parse_string() 6812 6813 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6814 modifier = "IN NATURAL LANGUAGE MODE" 6815 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6816 modifier = f"{modifier} WITH QUERY EXPANSION" 6817 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6818 modifier = "IN BOOLEAN MODE" 6819 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6820 modifier = "WITH QUERY EXPANSION" 6821 else: 6822 modifier = None 6823 6824 return self.expression( 6825 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6826 ) 6827 6828 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6829 def _parse_open_json(self) -> exp.OpenJSON: 6830 this = self._parse_bitwise() 6831 path = self._match(TokenType.COMMA) and self._parse_string() 6832 6833 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6834 this = self._parse_field(any_token=True) 6835 kind = self._parse_types() 6836 path = self._parse_string() 6837 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6838 6839 return self.expression( 6840 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6841 ) 6842 6843 expressions = None 6844 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6845 self._match_l_paren() 6846 expressions = self._parse_csv(_parse_open_json_column_def) 6847 6848 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6849 6850 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6851 args = self._parse_csv(self._parse_bitwise) 6852 6853 if self._match(TokenType.IN): 6854 return self.expression( 6855 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6856 ) 6857 6858 if haystack_first: 6859 haystack = seq_get(args, 0) 6860 needle = seq_get(args, 1) 6861 else: 6862 haystack = seq_get(args, 1) 6863 needle = seq_get(args, 0) 6864 6865 return self.expression( 6866 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6867 ) 6868 6869 def _parse_predict(self) -> exp.Predict: 6870 self._match_text_seq("MODEL") 6871 this = self._parse_table() 6872 6873 self._match(TokenType.COMMA) 6874 self._match_text_seq("TABLE") 6875 6876 return self.expression( 6877 exp.Predict, 6878 this=this, 6879 expression=self._parse_table(), 6880 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6881 ) 6882 6883 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6884 args = self._parse_csv(self._parse_table) 6885 return exp.JoinHint(this=func_name.upper(), expressions=args) 6886 6887 def _parse_substring(self) -> exp.Substring: 6888 # Postgres supports the form: substring(string [from int] [for int]) 6889 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6890 6891 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6892 6893 if self._match(TokenType.FROM): 6894 args.append(self._parse_bitwise()) 6895 if self._match(TokenType.FOR): 6896 if len(args) == 1: 6897 args.append(exp.Literal.number(1)) 6898 args.append(self._parse_bitwise()) 6899 6900 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6901 6902 def _parse_trim(self) -> exp.Trim: 6903 # https://www.w3resource.com/sql/character-functions/trim.php 6904 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6905 6906 position = None 6907 collation = None 6908 expression = None 6909 6910 if self._match_texts(self.TRIM_TYPES): 6911 position = self._prev.text.upper() 6912 6913 this = self._parse_bitwise() 6914 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6915 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6916 expression = self._parse_bitwise() 6917 6918 if invert_order: 6919 this, expression = expression, this 6920 6921 if self._match(TokenType.COLLATE): 6922 collation = self._parse_bitwise() 6923 6924 return self.expression( 6925 exp.Trim, this=this, position=position, expression=expression, collation=collation 6926 ) 6927 6928 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6929 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6930 6931 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6932 return self._parse_window(self._parse_id_var(), alias=True) 6933 6934 def _parse_respect_or_ignore_nulls( 6935 self, this: t.Optional[exp.Expression] 6936 ) -> t.Optional[exp.Expression]: 6937 if self._match_text_seq("IGNORE", "NULLS"): 6938 return self.expression(exp.IgnoreNulls, this=this) 6939 if self._match_text_seq("RESPECT", "NULLS"): 6940 return self.expression(exp.RespectNulls, this=this) 6941 return this 6942 6943 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6944 if self._match(TokenType.HAVING): 6945 self._match_texts(("MAX", "MIN")) 6946 max = self._prev.text.upper() != "MIN" 6947 return self.expression( 6948 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6949 ) 6950 6951 return this 6952 6953 def _parse_window( 6954 self, this: t.Optional[exp.Expression], alias: bool = False 6955 ) -> t.Optional[exp.Expression]: 6956 func = this 6957 comments = func.comments if isinstance(func, exp.Expression) else None 6958 6959 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6960 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6961 if self._match_text_seq("WITHIN", "GROUP"): 6962 order = self._parse_wrapped(self._parse_order) 6963 this = self.expression(exp.WithinGroup, this=this, expression=order) 6964 6965 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6966 self._match(TokenType.WHERE) 6967 this = self.expression( 6968 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6969 ) 6970 self._match_r_paren() 6971 6972 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6973 # Some dialects choose to implement and some do not. 6974 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6975 6976 # There is some code above in _parse_lambda that handles 6977 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6978 6979 # The below changes handle 6980 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6981 6982 # Oracle allows both formats 6983 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6984 # and Snowflake chose to do the same for familiarity 6985 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6986 if isinstance(this, exp.AggFunc): 6987 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6988 6989 if ignore_respect and ignore_respect is not this: 6990 ignore_respect.replace(ignore_respect.this) 6991 this = self.expression(ignore_respect.__class__, this=this) 6992 6993 this = self._parse_respect_or_ignore_nulls(this) 6994 6995 # bigquery select from window x AS (partition by ...) 6996 if alias: 6997 over = None 6998 self._match(TokenType.ALIAS) 6999 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7000 return this 7001 else: 7002 over = self._prev.text.upper() 7003 7004 if comments and isinstance(func, exp.Expression): 7005 func.pop_comments() 7006 7007 if not self._match(TokenType.L_PAREN): 7008 return self.expression( 7009 exp.Window, 7010 comments=comments, 7011 this=this, 7012 alias=self._parse_id_var(False), 7013 over=over, 7014 ) 7015 7016 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7017 7018 first = self._match(TokenType.FIRST) 7019 if self._match_text_seq("LAST"): 7020 first = False 7021 7022 partition, order = self._parse_partition_and_order() 7023 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7024 7025 if kind: 7026 self._match(TokenType.BETWEEN) 7027 start = self._parse_window_spec() 7028 self._match(TokenType.AND) 7029 end = self._parse_window_spec() 7030 exclude = ( 7031 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7032 if self._match_text_seq("EXCLUDE") 7033 else None 7034 ) 7035 7036 spec = self.expression( 7037 exp.WindowSpec, 7038 kind=kind, 7039 start=start["value"], 7040 start_side=start["side"], 7041 end=end["value"], 7042 end_side=end["side"], 7043 exclude=exclude, 7044 ) 7045 else: 7046 spec = None 7047 7048 self._match_r_paren() 7049 7050 window = self.expression( 7051 exp.Window, 7052 comments=comments, 7053 this=this, 7054 partition_by=partition, 7055 order=order, 7056 spec=spec, 7057 alias=window_alias, 7058 over=over, 7059 first=first, 7060 ) 7061 7062 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7063 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7064 return self._parse_window(window, alias=alias) 7065 7066 return window 7067 7068 def _parse_partition_and_order( 7069 self, 7070 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7071 return self._parse_partition_by(), self._parse_order() 7072 7073 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7074 self._match(TokenType.BETWEEN) 7075 7076 return { 7077 "value": ( 7078 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7079 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7080 or self._parse_bitwise() 7081 ), 7082 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7083 } 7084 7085 def _parse_alias( 7086 self, this: t.Optional[exp.Expression], explicit: bool = False 7087 ) -> t.Optional[exp.Expression]: 7088 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7089 # so this section tries to parse the clause version and if it fails, it treats the token 7090 # as an identifier (alias) 7091 if self._can_parse_limit_or_offset(): 7092 return this 7093 7094 any_token = self._match(TokenType.ALIAS) 7095 comments = self._prev_comments or [] 7096 7097 if explicit and not any_token: 7098 return this 7099 7100 if self._match(TokenType.L_PAREN): 7101 aliases = self.expression( 7102 exp.Aliases, 7103 comments=comments, 7104 this=this, 7105 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7106 ) 7107 self._match_r_paren(aliases) 7108 return aliases 7109 7110 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7111 self.STRING_ALIASES and self._parse_string_as_identifier() 7112 ) 7113 7114 if alias: 7115 comments.extend(alias.pop_comments()) 7116 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7117 column = this.this 7118 7119 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7120 if not this.comments and column and column.comments: 7121 this.comments = column.pop_comments() 7122 7123 return this 7124 7125 def _parse_id_var( 7126 self, 7127 any_token: bool = True, 7128 tokens: t.Optional[t.Collection[TokenType]] = None, 7129 ) -> t.Optional[exp.Expression]: 7130 expression = self._parse_identifier() 7131 if not expression and ( 7132 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7133 ): 7134 quoted = self._prev.token_type == TokenType.STRING 7135 expression = self._identifier_expression(quoted=quoted) 7136 7137 return expression 7138 7139 def _parse_string(self) -> t.Optional[exp.Expression]: 7140 if self._match_set(self.STRING_PARSERS): 7141 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7142 return self._parse_placeholder() 7143 7144 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7145 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7146 if output: 7147 output.update_positions(self._prev) 7148 return output 7149 7150 def _parse_number(self) -> t.Optional[exp.Expression]: 7151 if self._match_set(self.NUMERIC_PARSERS): 7152 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7153 return self._parse_placeholder() 7154 7155 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7156 if self._match(TokenType.IDENTIFIER): 7157 return self._identifier_expression(quoted=True) 7158 return self._parse_placeholder() 7159 7160 def _parse_var( 7161 self, 7162 any_token: bool = False, 7163 tokens: t.Optional[t.Collection[TokenType]] = None, 7164 upper: bool = False, 7165 ) -> t.Optional[exp.Expression]: 7166 if ( 7167 (any_token and self._advance_any()) 7168 or self._match(TokenType.VAR) 7169 or (self._match_set(tokens) if tokens else False) 7170 ): 7171 return self.expression( 7172 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7173 ) 7174 return self._parse_placeholder() 7175 7176 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7177 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7178 self._advance() 7179 return self._prev 7180 return None 7181 7182 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7183 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7184 7185 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7186 return self._parse_primary() or self._parse_var(any_token=True) 7187 7188 def _parse_null(self) -> t.Optional[exp.Expression]: 7189 if self._match_set(self.NULL_TOKENS): 7190 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7191 return self._parse_placeholder() 7192 7193 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7194 if self._match(TokenType.TRUE): 7195 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7196 if self._match(TokenType.FALSE): 7197 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7198 return self._parse_placeholder() 7199 7200 def _parse_star(self) -> t.Optional[exp.Expression]: 7201 if self._match(TokenType.STAR): 7202 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7203 return self._parse_placeholder() 7204 7205 def _parse_parameter(self) -> exp.Parameter: 7206 this = self._parse_identifier() or self._parse_primary_or_var() 7207 return self.expression(exp.Parameter, this=this) 7208 7209 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7210 if self._match_set(self.PLACEHOLDER_PARSERS): 7211 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7212 if placeholder: 7213 return placeholder 7214 self._advance(-1) 7215 return None 7216 7217 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7218 if not self._match_texts(keywords): 7219 return None 7220 if self._match(TokenType.L_PAREN, advance=False): 7221 return self._parse_wrapped_csv(self._parse_expression) 7222 7223 expression = self._parse_expression() 7224 return [expression] if expression else None 7225 7226 def _parse_csv( 7227 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7228 ) -> t.List[exp.Expression]: 7229 parse_result = parse_method() 7230 items = [parse_result] if parse_result is not None else [] 7231 7232 while self._match(sep): 7233 self._add_comments(parse_result) 7234 parse_result = parse_method() 7235 if parse_result is not None: 7236 items.append(parse_result) 7237 7238 return items 7239 7240 def _parse_tokens( 7241 self, parse_method: t.Callable, expressions: t.Dict 7242 ) -> t.Optional[exp.Expression]: 7243 this = parse_method() 7244 7245 while self._match_set(expressions): 7246 this = self.expression( 7247 expressions[self._prev.token_type], 7248 this=this, 7249 comments=self._prev_comments, 7250 expression=parse_method(), 7251 ) 7252 7253 return this 7254 7255 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7256 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7257 7258 def _parse_wrapped_csv( 7259 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7260 ) -> t.List[exp.Expression]: 7261 return self._parse_wrapped( 7262 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7263 ) 7264 7265 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7266 wrapped = self._match(TokenType.L_PAREN) 7267 if not wrapped and not optional: 7268 self.raise_error("Expecting (") 7269 parse_result = parse_method() 7270 if wrapped: 7271 self._match_r_paren() 7272 return parse_result 7273 7274 def _parse_expressions(self) -> t.List[exp.Expression]: 7275 return self._parse_csv(self._parse_expression) 7276 7277 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7278 return self._parse_select() or self._parse_set_operations( 7279 self._parse_alias(self._parse_assignment(), explicit=True) 7280 if alias 7281 else self._parse_assignment() 7282 ) 7283 7284 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7285 return self._parse_query_modifiers( 7286 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7287 ) 7288 7289 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7290 this = None 7291 if self._match_texts(self.TRANSACTION_KIND): 7292 this = self._prev.text 7293 7294 self._match_texts(("TRANSACTION", "WORK")) 7295 7296 modes = [] 7297 while True: 7298 mode = [] 7299 while self._match(TokenType.VAR): 7300 mode.append(self._prev.text) 7301 7302 if mode: 7303 modes.append(" ".join(mode)) 7304 if not self._match(TokenType.COMMA): 7305 break 7306 7307 return self.expression(exp.Transaction, this=this, modes=modes) 7308 7309 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7310 chain = None 7311 savepoint = None 7312 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7313 7314 self._match_texts(("TRANSACTION", "WORK")) 7315 7316 if self._match_text_seq("TO"): 7317 self._match_text_seq("SAVEPOINT") 7318 savepoint = self._parse_id_var() 7319 7320 if self._match(TokenType.AND): 7321 chain = not self._match_text_seq("NO") 7322 self._match_text_seq("CHAIN") 7323 7324 if is_rollback: 7325 return self.expression(exp.Rollback, savepoint=savepoint) 7326 7327 return self.expression(exp.Commit, chain=chain) 7328 7329 def _parse_refresh(self) -> exp.Refresh: 7330 self._match(TokenType.TABLE) 7331 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7332 7333 def _parse_column_def_with_exists(self): 7334 start = self._index 7335 self._match(TokenType.COLUMN) 7336 7337 exists_column = self._parse_exists(not_=True) 7338 expression = self._parse_field_def() 7339 7340 if not isinstance(expression, exp.ColumnDef): 7341 self._retreat(start) 7342 return None 7343 7344 expression.set("exists", exists_column) 7345 7346 return expression 7347 7348 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7349 if not self._prev.text.upper() == "ADD": 7350 return None 7351 7352 expression = self._parse_column_def_with_exists() 7353 if not expression: 7354 return None 7355 7356 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7357 if self._match_texts(("FIRST", "AFTER")): 7358 position = self._prev.text 7359 column_position = self.expression( 7360 exp.ColumnPosition, this=self._parse_column(), position=position 7361 ) 7362 expression.set("position", column_position) 7363 7364 return expression 7365 7366 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7367 drop = self._match(TokenType.DROP) and self._parse_drop() 7368 if drop and not isinstance(drop, exp.Command): 7369 drop.set("kind", drop.args.get("kind", "COLUMN")) 7370 return drop 7371 7372 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7373 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7374 return self.expression( 7375 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7376 ) 7377 7378 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7379 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7380 self._match_text_seq("ADD") 7381 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7382 return self.expression( 7383 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7384 ) 7385 7386 column_def = self._parse_add_column() 7387 if isinstance(column_def, exp.ColumnDef): 7388 return column_def 7389 7390 exists = self._parse_exists(not_=True) 7391 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7392 return self.expression( 7393 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7394 ) 7395 7396 return None 7397 7398 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7399 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7400 or self._match_text_seq("COLUMNS") 7401 ): 7402 schema = self._parse_schema() 7403 7404 return ( 7405 ensure_list(schema) 7406 if schema 7407 else self._parse_csv(self._parse_column_def_with_exists) 7408 ) 7409 7410 return self._parse_csv(_parse_add_alteration) 7411 7412 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7413 if self._match_texts(self.ALTER_ALTER_PARSERS): 7414 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7415 7416 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7417 # keyword after ALTER we default to parsing this statement 7418 self._match(TokenType.COLUMN) 7419 column = self._parse_field(any_token=True) 7420 7421 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7422 return self.expression(exp.AlterColumn, this=column, drop=True) 7423 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7424 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7425 if self._match(TokenType.COMMENT): 7426 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7427 if self._match_text_seq("DROP", "NOT", "NULL"): 7428 return self.expression( 7429 exp.AlterColumn, 7430 this=column, 7431 drop=True, 7432 allow_null=True, 7433 ) 7434 if self._match_text_seq("SET", "NOT", "NULL"): 7435 return self.expression( 7436 exp.AlterColumn, 7437 this=column, 7438 allow_null=False, 7439 ) 7440 7441 if self._match_text_seq("SET", "VISIBLE"): 7442 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7443 if self._match_text_seq("SET", "INVISIBLE"): 7444 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7445 7446 self._match_text_seq("SET", "DATA") 7447 self._match_text_seq("TYPE") 7448 return self.expression( 7449 exp.AlterColumn, 7450 this=column, 7451 dtype=self._parse_types(), 7452 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7453 using=self._match(TokenType.USING) and self._parse_assignment(), 7454 ) 7455 7456 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7457 if self._match_texts(("ALL", "EVEN", "AUTO")): 7458 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7459 7460 self._match_text_seq("KEY", "DISTKEY") 7461 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7462 7463 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7464 if compound: 7465 self._match_text_seq("SORTKEY") 7466 7467 if self._match(TokenType.L_PAREN, advance=False): 7468 return self.expression( 7469 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7470 ) 7471 7472 self._match_texts(("AUTO", "NONE")) 7473 return self.expression( 7474 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7475 ) 7476 7477 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7478 index = self._index - 1 7479 7480 partition_exists = self._parse_exists() 7481 if self._match(TokenType.PARTITION, advance=False): 7482 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7483 7484 self._retreat(index) 7485 return self._parse_csv(self._parse_drop_column) 7486 7487 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7488 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7489 exists = self._parse_exists() 7490 old_column = self._parse_column() 7491 to = self._match_text_seq("TO") 7492 new_column = self._parse_column() 7493 7494 if old_column is None or to is None or new_column is None: 7495 return None 7496 7497 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7498 7499 self._match_text_seq("TO") 7500 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7501 7502 def _parse_alter_table_set(self) -> exp.AlterSet: 7503 alter_set = self.expression(exp.AlterSet) 7504 7505 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7506 "TABLE", "PROPERTIES" 7507 ): 7508 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7509 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7510 alter_set.set("expressions", [self._parse_assignment()]) 7511 elif self._match_texts(("LOGGED", "UNLOGGED")): 7512 alter_set.set("option", exp.var(self._prev.text.upper())) 7513 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7514 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7515 elif self._match_text_seq("LOCATION"): 7516 alter_set.set("location", self._parse_field()) 7517 elif self._match_text_seq("ACCESS", "METHOD"): 7518 alter_set.set("access_method", self._parse_field()) 7519 elif self._match_text_seq("TABLESPACE"): 7520 alter_set.set("tablespace", self._parse_field()) 7521 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7522 alter_set.set("file_format", [self._parse_field()]) 7523 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7524 alter_set.set("file_format", self._parse_wrapped_options()) 7525 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7526 alter_set.set("copy_options", self._parse_wrapped_options()) 7527 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7528 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7529 else: 7530 if self._match_text_seq("SERDE"): 7531 alter_set.set("serde", self._parse_field()) 7532 7533 properties = self._parse_wrapped(self._parse_properties, optional=True) 7534 alter_set.set("expressions", [properties]) 7535 7536 return alter_set 7537 7538 def _parse_alter(self) -> exp.Alter | exp.Command: 7539 start = self._prev 7540 7541 alter_token = self._match_set(self.ALTERABLES) and self._prev 7542 if not alter_token: 7543 return self._parse_as_command(start) 7544 7545 exists = self._parse_exists() 7546 only = self._match_text_seq("ONLY") 7547 this = self._parse_table(schema=True) 7548 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7549 7550 if self._next: 7551 self._advance() 7552 7553 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7554 if parser: 7555 actions = ensure_list(parser(self)) 7556 not_valid = self._match_text_seq("NOT", "VALID") 7557 options = self._parse_csv(self._parse_property) 7558 7559 if not self._curr and actions: 7560 return self.expression( 7561 exp.Alter, 7562 this=this, 7563 kind=alter_token.text.upper(), 7564 exists=exists, 7565 actions=actions, 7566 only=only, 7567 options=options, 7568 cluster=cluster, 7569 not_valid=not_valid, 7570 ) 7571 7572 return self._parse_as_command(start) 7573 7574 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7575 start = self._prev 7576 # https://duckdb.org/docs/sql/statements/analyze 7577 if not self._curr: 7578 return self.expression(exp.Analyze) 7579 7580 options = [] 7581 while self._match_texts(self.ANALYZE_STYLES): 7582 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7583 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7584 else: 7585 options.append(self._prev.text.upper()) 7586 7587 this: t.Optional[exp.Expression] = None 7588 inner_expression: t.Optional[exp.Expression] = None 7589 7590 kind = self._curr and self._curr.text.upper() 7591 7592 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7593 this = self._parse_table_parts() 7594 elif self._match_text_seq("TABLES"): 7595 if self._match_set((TokenType.FROM, TokenType.IN)): 7596 kind = f"{kind} {self._prev.text.upper()}" 7597 this = self._parse_table(schema=True, is_db_reference=True) 7598 elif self._match_text_seq("DATABASE"): 7599 this = self._parse_table(schema=True, is_db_reference=True) 7600 elif self._match_text_seq("CLUSTER"): 7601 this = self._parse_table() 7602 # Try matching inner expr keywords before fallback to parse table. 7603 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7604 kind = None 7605 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7606 else: 7607 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7608 kind = None 7609 this = self._parse_table_parts() 7610 7611 partition = self._try_parse(self._parse_partition) 7612 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7613 return self._parse_as_command(start) 7614 7615 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7616 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7617 "WITH", "ASYNC", "MODE" 7618 ): 7619 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7620 else: 7621 mode = None 7622 7623 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7624 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7625 7626 properties = self._parse_properties() 7627 return self.expression( 7628 exp.Analyze, 7629 kind=kind, 7630 this=this, 7631 mode=mode, 7632 partition=partition, 7633 properties=properties, 7634 expression=inner_expression, 7635 options=options, 7636 ) 7637 7638 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7639 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7640 this = None 7641 kind = self._prev.text.upper() 7642 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7643 expressions = [] 7644 7645 if not self._match_text_seq("STATISTICS"): 7646 self.raise_error("Expecting token STATISTICS") 7647 7648 if self._match_text_seq("NOSCAN"): 7649 this = "NOSCAN" 7650 elif self._match(TokenType.FOR): 7651 if self._match_text_seq("ALL", "COLUMNS"): 7652 this = "FOR ALL COLUMNS" 7653 if self._match_texts("COLUMNS"): 7654 this = "FOR COLUMNS" 7655 expressions = self._parse_csv(self._parse_column_reference) 7656 elif self._match_text_seq("SAMPLE"): 7657 sample = self._parse_number() 7658 expressions = [ 7659 self.expression( 7660 exp.AnalyzeSample, 7661 sample=sample, 7662 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7663 ) 7664 ] 7665 7666 return self.expression( 7667 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7668 ) 7669 7670 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7671 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7672 kind = None 7673 this = None 7674 expression: t.Optional[exp.Expression] = None 7675 if self._match_text_seq("REF", "UPDATE"): 7676 kind = "REF" 7677 this = "UPDATE" 7678 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7679 this = "UPDATE SET DANGLING TO NULL" 7680 elif self._match_text_seq("STRUCTURE"): 7681 kind = "STRUCTURE" 7682 if self._match_text_seq("CASCADE", "FAST"): 7683 this = "CASCADE FAST" 7684 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7685 ("ONLINE", "OFFLINE") 7686 ): 7687 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7688 expression = self._parse_into() 7689 7690 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7691 7692 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7693 this = self._prev.text.upper() 7694 if self._match_text_seq("COLUMNS"): 7695 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7696 return None 7697 7698 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7699 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7700 if self._match_text_seq("STATISTICS"): 7701 return self.expression(exp.AnalyzeDelete, kind=kind) 7702 return None 7703 7704 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7705 if self._match_text_seq("CHAINED", "ROWS"): 7706 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7707 return None 7708 7709 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7710 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7711 this = self._prev.text.upper() 7712 expression: t.Optional[exp.Expression] = None 7713 expressions = [] 7714 update_options = None 7715 7716 if self._match_text_seq("HISTOGRAM", "ON"): 7717 expressions = self._parse_csv(self._parse_column_reference) 7718 with_expressions = [] 7719 while self._match(TokenType.WITH): 7720 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7721 if self._match_texts(("SYNC", "ASYNC")): 7722 if self._match_text_seq("MODE", advance=False): 7723 with_expressions.append(f"{self._prev.text.upper()} MODE") 7724 self._advance() 7725 else: 7726 buckets = self._parse_number() 7727 if self._match_text_seq("BUCKETS"): 7728 with_expressions.append(f"{buckets} BUCKETS") 7729 if with_expressions: 7730 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7731 7732 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7733 TokenType.UPDATE, advance=False 7734 ): 7735 update_options = self._prev.text.upper() 7736 self._advance() 7737 elif self._match_text_seq("USING", "DATA"): 7738 expression = self.expression(exp.UsingData, this=self._parse_string()) 7739 7740 return self.expression( 7741 exp.AnalyzeHistogram, 7742 this=this, 7743 expressions=expressions, 7744 expression=expression, 7745 update_options=update_options, 7746 ) 7747 7748 def _parse_merge(self) -> exp.Merge: 7749 self._match(TokenType.INTO) 7750 target = self._parse_table() 7751 7752 if target and self._match(TokenType.ALIAS, advance=False): 7753 target.set("alias", self._parse_table_alias()) 7754 7755 self._match(TokenType.USING) 7756 using = self._parse_table() 7757 7758 self._match(TokenType.ON) 7759 on = self._parse_assignment() 7760 7761 return self.expression( 7762 exp.Merge, 7763 this=target, 7764 using=using, 7765 on=on, 7766 whens=self._parse_when_matched(), 7767 returning=self._parse_returning(), 7768 ) 7769 7770 def _parse_when_matched(self) -> exp.Whens: 7771 whens = [] 7772 7773 while self._match(TokenType.WHEN): 7774 matched = not self._match(TokenType.NOT) 7775 self._match_text_seq("MATCHED") 7776 source = ( 7777 False 7778 if self._match_text_seq("BY", "TARGET") 7779 else self._match_text_seq("BY", "SOURCE") 7780 ) 7781 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7782 7783 self._match(TokenType.THEN) 7784 7785 if self._match(TokenType.INSERT): 7786 this = self._parse_star() 7787 if this: 7788 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7789 else: 7790 then = self.expression( 7791 exp.Insert, 7792 this=exp.var("ROW") 7793 if self._match_text_seq("ROW") 7794 else self._parse_value(values=False), 7795 expression=self._match_text_seq("VALUES") and self._parse_value(), 7796 ) 7797 elif self._match(TokenType.UPDATE): 7798 expressions = self._parse_star() 7799 if expressions: 7800 then = self.expression(exp.Update, expressions=expressions) 7801 else: 7802 then = self.expression( 7803 exp.Update, 7804 expressions=self._match(TokenType.SET) 7805 and self._parse_csv(self._parse_equality), 7806 ) 7807 elif self._match(TokenType.DELETE): 7808 then = self.expression(exp.Var, this=self._prev.text) 7809 else: 7810 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7811 7812 whens.append( 7813 self.expression( 7814 exp.When, 7815 matched=matched, 7816 source=source, 7817 condition=condition, 7818 then=then, 7819 ) 7820 ) 7821 return self.expression(exp.Whens, expressions=whens) 7822 7823 def _parse_show(self) -> t.Optional[exp.Expression]: 7824 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7825 if parser: 7826 return parser(self) 7827 return self._parse_as_command(self._prev) 7828 7829 def _parse_set_item_assignment( 7830 self, kind: t.Optional[str] = None 7831 ) -> t.Optional[exp.Expression]: 7832 index = self._index 7833 7834 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7835 return self._parse_set_transaction(global_=kind == "GLOBAL") 7836 7837 left = self._parse_primary() or self._parse_column() 7838 assignment_delimiter = self._match_texts(("=", "TO")) 7839 7840 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7841 self._retreat(index) 7842 return None 7843 7844 right = self._parse_statement() or self._parse_id_var() 7845 if isinstance(right, (exp.Column, exp.Identifier)): 7846 right = exp.var(right.name) 7847 7848 this = self.expression(exp.EQ, this=left, expression=right) 7849 return self.expression(exp.SetItem, this=this, kind=kind) 7850 7851 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7852 self._match_text_seq("TRANSACTION") 7853 characteristics = self._parse_csv( 7854 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7855 ) 7856 return self.expression( 7857 exp.SetItem, 7858 expressions=characteristics, 7859 kind="TRANSACTION", 7860 **{"global": global_}, # type: ignore 7861 ) 7862 7863 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7864 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7865 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7866 7867 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7868 index = self._index 7869 set_ = self.expression( 7870 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7871 ) 7872 7873 if self._curr: 7874 self._retreat(index) 7875 return self._parse_as_command(self._prev) 7876 7877 return set_ 7878 7879 def _parse_var_from_options( 7880 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7881 ) -> t.Optional[exp.Var]: 7882 start = self._curr 7883 if not start: 7884 return None 7885 7886 option = start.text.upper() 7887 continuations = options.get(option) 7888 7889 index = self._index 7890 self._advance() 7891 for keywords in continuations or []: 7892 if isinstance(keywords, str): 7893 keywords = (keywords,) 7894 7895 if self._match_text_seq(*keywords): 7896 option = f"{option} {' '.join(keywords)}" 7897 break 7898 else: 7899 if continuations or continuations is None: 7900 if raise_unmatched: 7901 self.raise_error(f"Unknown option {option}") 7902 7903 self._retreat(index) 7904 return None 7905 7906 return exp.var(option) 7907 7908 def _parse_as_command(self, start: Token) -> exp.Command: 7909 while self._curr: 7910 self._advance() 7911 text = self._find_sql(start, self._prev) 7912 size = len(start.text) 7913 self._warn_unsupported() 7914 return exp.Command(this=text[:size], expression=text[size:]) 7915 7916 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7917 settings = [] 7918 7919 self._match_l_paren() 7920 kind = self._parse_id_var() 7921 7922 if self._match(TokenType.L_PAREN): 7923 while True: 7924 key = self._parse_id_var() 7925 value = self._parse_primary() 7926 if not key and value is None: 7927 break 7928 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7929 self._match(TokenType.R_PAREN) 7930 7931 self._match_r_paren() 7932 7933 return self.expression( 7934 exp.DictProperty, 7935 this=this, 7936 kind=kind.this if kind else None, 7937 settings=settings, 7938 ) 7939 7940 def _parse_dict_range(self, this: str) -> exp.DictRange: 7941 self._match_l_paren() 7942 has_min = self._match_text_seq("MIN") 7943 if has_min: 7944 min = self._parse_var() or self._parse_primary() 7945 self._match_text_seq("MAX") 7946 max = self._parse_var() or self._parse_primary() 7947 else: 7948 max = self._parse_var() or self._parse_primary() 7949 min = exp.Literal.number(0) 7950 self._match_r_paren() 7951 return self.expression(exp.DictRange, this=this, min=min, max=max) 7952 7953 def _parse_comprehension( 7954 self, this: t.Optional[exp.Expression] 7955 ) -> t.Optional[exp.Comprehension]: 7956 index = self._index 7957 expression = self._parse_column() 7958 if not self._match(TokenType.IN): 7959 self._retreat(index - 1) 7960 return None 7961 iterator = self._parse_column() 7962 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7963 return self.expression( 7964 exp.Comprehension, 7965 this=this, 7966 expression=expression, 7967 iterator=iterator, 7968 condition=condition, 7969 ) 7970 7971 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7972 if self._match(TokenType.HEREDOC_STRING): 7973 return self.expression(exp.Heredoc, this=self._prev.text) 7974 7975 if not self._match_text_seq("$"): 7976 return None 7977 7978 tags = ["$"] 7979 tag_text = None 7980 7981 if self._is_connected(): 7982 self._advance() 7983 tags.append(self._prev.text.upper()) 7984 else: 7985 self.raise_error("No closing $ found") 7986 7987 if tags[-1] != "$": 7988 if self._is_connected() and self._match_text_seq("$"): 7989 tag_text = tags[-1] 7990 tags.append("$") 7991 else: 7992 self.raise_error("No closing $ found") 7993 7994 heredoc_start = self._curr 7995 7996 while self._curr: 7997 if self._match_text_seq(*tags, advance=False): 7998 this = self._find_sql(heredoc_start, self._prev) 7999 self._advance(len(tags)) 8000 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8001 8002 self._advance() 8003 8004 self.raise_error(f"No closing {''.join(tags)} found") 8005 return None 8006 8007 def _find_parser( 8008 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8009 ) -> t.Optional[t.Callable]: 8010 if not self._curr: 8011 return None 8012 8013 index = self._index 8014 this = [] 8015 while True: 8016 # The current token might be multiple words 8017 curr = self._curr.text.upper() 8018 key = curr.split(" ") 8019 this.append(curr) 8020 8021 self._advance() 8022 result, trie = in_trie(trie, key) 8023 if result == TrieResult.FAILED: 8024 break 8025 8026 if result == TrieResult.EXISTS: 8027 subparser = parsers[" ".join(this)] 8028 return subparser 8029 8030 self._retreat(index) 8031 return None 8032 8033 def _match(self, token_type, advance=True, expression=None): 8034 if not self._curr: 8035 return None 8036 8037 if self._curr.token_type == token_type: 8038 if advance: 8039 self._advance() 8040 self._add_comments(expression) 8041 return True 8042 8043 return None 8044 8045 def _match_set(self, types, advance=True): 8046 if not self._curr: 8047 return None 8048 8049 if self._curr.token_type in types: 8050 if advance: 8051 self._advance() 8052 return True 8053 8054 return None 8055 8056 def _match_pair(self, token_type_a, token_type_b, advance=True): 8057 if not self._curr or not self._next: 8058 return None 8059 8060 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8061 if advance: 8062 self._advance(2) 8063 return True 8064 8065 return None 8066 8067 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8068 if not self._match(TokenType.L_PAREN, expression=expression): 8069 self.raise_error("Expecting (") 8070 8071 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8072 if not self._match(TokenType.R_PAREN, expression=expression): 8073 self.raise_error("Expecting )") 8074 8075 def _match_texts(self, texts, advance=True): 8076 if ( 8077 self._curr 8078 and self._curr.token_type != TokenType.STRING 8079 and self._curr.text.upper() in texts 8080 ): 8081 if advance: 8082 self._advance() 8083 return True 8084 return None 8085 8086 def _match_text_seq(self, *texts, advance=True): 8087 index = self._index 8088 for text in texts: 8089 if ( 8090 self._curr 8091 and self._curr.token_type != TokenType.STRING 8092 and self._curr.text.upper() == text 8093 ): 8094 self._advance() 8095 else: 8096 self._retreat(index) 8097 return None 8098 8099 if not advance: 8100 self._retreat(index) 8101 8102 return True 8103 8104 def _replace_lambda( 8105 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8106 ) -> t.Optional[exp.Expression]: 8107 if not node: 8108 return node 8109 8110 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8111 8112 for column in node.find_all(exp.Column): 8113 typ = lambda_types.get(column.parts[0].name) 8114 if typ is not None: 8115 dot_or_id = column.to_dot() if column.table else column.this 8116 8117 if typ: 8118 dot_or_id = self.expression( 8119 exp.Cast, 8120 this=dot_or_id, 8121 to=typ, 8122 ) 8123 8124 parent = column.parent 8125 8126 while isinstance(parent, exp.Dot): 8127 if not isinstance(parent.parent, exp.Dot): 8128 parent.replace(dot_or_id) 8129 break 8130 parent = parent.parent 8131 else: 8132 if column is node: 8133 node = dot_or_id 8134 else: 8135 column.replace(dot_or_id) 8136 return node 8137 8138 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8139 start = self._prev 8140 8141 # Not to be confused with TRUNCATE(number, decimals) function call 8142 if self._match(TokenType.L_PAREN): 8143 self._retreat(self._index - 2) 8144 return self._parse_function() 8145 8146 # Clickhouse supports TRUNCATE DATABASE as well 8147 is_database = self._match(TokenType.DATABASE) 8148 8149 self._match(TokenType.TABLE) 8150 8151 exists = self._parse_exists(not_=False) 8152 8153 expressions = self._parse_csv( 8154 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8155 ) 8156 8157 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8158 8159 if self._match_text_seq("RESTART", "IDENTITY"): 8160 identity = "RESTART" 8161 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8162 identity = "CONTINUE" 8163 else: 8164 identity = None 8165 8166 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8167 option = self._prev.text 8168 else: 8169 option = None 8170 8171 partition = self._parse_partition() 8172 8173 # Fallback case 8174 if self._curr: 8175 return self._parse_as_command(start) 8176 8177 return self.expression( 8178 exp.TruncateTable, 8179 expressions=expressions, 8180 is_database=is_database, 8181 exists=exists, 8182 cluster=cluster, 8183 identity=identity, 8184 option=option, 8185 partition=partition, 8186 ) 8187 8188 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8189 this = self._parse_ordered(self._parse_opclass) 8190 8191 if not self._match(TokenType.WITH): 8192 return this 8193 8194 op = self._parse_var(any_token=True) 8195 8196 return self.expression(exp.WithOperator, this=this, op=op) 8197 8198 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8199 self._match(TokenType.EQ) 8200 self._match(TokenType.L_PAREN) 8201 8202 opts: t.List[t.Optional[exp.Expression]] = [] 8203 option: exp.Expression | None 8204 while self._curr and not self._match(TokenType.R_PAREN): 8205 if self._match_text_seq("FORMAT_NAME", "="): 8206 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8207 option = self._parse_format_name() 8208 else: 8209 option = self._parse_property() 8210 8211 if option is None: 8212 self.raise_error("Unable to parse option") 8213 break 8214 8215 opts.append(option) 8216 8217 return opts 8218 8219 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8220 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8221 8222 options = [] 8223 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8224 option = self._parse_var(any_token=True) 8225 prev = self._prev.text.upper() 8226 8227 # Different dialects might separate options and values by white space, "=" and "AS" 8228 self._match(TokenType.EQ) 8229 self._match(TokenType.ALIAS) 8230 8231 param = self.expression(exp.CopyParameter, this=option) 8232 8233 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8234 TokenType.L_PAREN, advance=False 8235 ): 8236 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8237 param.set("expressions", self._parse_wrapped_options()) 8238 elif prev == "FILE_FORMAT": 8239 # T-SQL's external file format case 8240 param.set("expression", self._parse_field()) 8241 else: 8242 param.set("expression", self._parse_unquoted_field()) 8243 8244 options.append(param) 8245 self._match(sep) 8246 8247 return options 8248 8249 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8250 expr = self.expression(exp.Credentials) 8251 8252 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8253 expr.set("storage", self._parse_field()) 8254 if self._match_text_seq("CREDENTIALS"): 8255 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8256 creds = ( 8257 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8258 ) 8259 expr.set("credentials", creds) 8260 if self._match_text_seq("ENCRYPTION"): 8261 expr.set("encryption", self._parse_wrapped_options()) 8262 if self._match_text_seq("IAM_ROLE"): 8263 expr.set("iam_role", self._parse_field()) 8264 if self._match_text_seq("REGION"): 8265 expr.set("region", self._parse_field()) 8266 8267 return expr 8268 8269 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8270 return self._parse_field() 8271 8272 def _parse_copy(self) -> exp.Copy | exp.Command: 8273 start = self._prev 8274 8275 self._match(TokenType.INTO) 8276 8277 this = ( 8278 self._parse_select(nested=True, parse_subquery_alias=False) 8279 if self._match(TokenType.L_PAREN, advance=False) 8280 else self._parse_table(schema=True) 8281 ) 8282 8283 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8284 8285 files = self._parse_csv(self._parse_file_location) 8286 credentials = self._parse_credentials() 8287 8288 self._match_text_seq("WITH") 8289 8290 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8291 8292 # Fallback case 8293 if self._curr: 8294 return self._parse_as_command(start) 8295 8296 return self.expression( 8297 exp.Copy, 8298 this=this, 8299 kind=kind, 8300 credentials=credentials, 8301 files=files, 8302 params=params, 8303 ) 8304 8305 def _parse_normalize(self) -> exp.Normalize: 8306 return self.expression( 8307 exp.Normalize, 8308 this=self._parse_bitwise(), 8309 form=self._match(TokenType.COMMA) and self._parse_var(), 8310 ) 8311 8312 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8313 args = self._parse_csv(lambda: self._parse_lambda()) 8314 8315 this = seq_get(args, 0) 8316 decimals = seq_get(args, 1) 8317 8318 return expr_type( 8319 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8320 ) 8321 8322 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8323 star_token = self._prev 8324 8325 if self._match_text_seq("COLUMNS", "(", advance=False): 8326 this = self._parse_function() 8327 if isinstance(this, exp.Columns): 8328 this.set("unpack", True) 8329 return this 8330 8331 return self.expression( 8332 exp.Star, 8333 **{ # type: ignore 8334 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8335 "replace": self._parse_star_op("REPLACE"), 8336 "rename": self._parse_star_op("RENAME"), 8337 }, 8338 ).update_positions(star_token) 8339 8340 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8341 privilege_parts = [] 8342 8343 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8344 # (end of privilege list) or L_PAREN (start of column list) are met 8345 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8346 privilege_parts.append(self._curr.text.upper()) 8347 self._advance() 8348 8349 this = exp.var(" ".join(privilege_parts)) 8350 expressions = ( 8351 self._parse_wrapped_csv(self._parse_column) 8352 if self._match(TokenType.L_PAREN, advance=False) 8353 else None 8354 ) 8355 8356 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8357 8358 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8359 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8360 principal = self._parse_id_var() 8361 8362 if not principal: 8363 return None 8364 8365 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8366 8367 def _parse_grant(self) -> exp.Grant | exp.Command: 8368 start = self._prev 8369 8370 privileges = self._parse_csv(self._parse_grant_privilege) 8371 8372 self._match(TokenType.ON) 8373 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8374 8375 # Attempt to parse the securable e.g. MySQL allows names 8376 # such as "foo.*", "*.*" which are not easily parseable yet 8377 securable = self._try_parse(self._parse_table_parts) 8378 8379 if not securable or not self._match_text_seq("TO"): 8380 return self._parse_as_command(start) 8381 8382 principals = self._parse_csv(self._parse_grant_principal) 8383 8384 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8385 8386 if self._curr: 8387 return self._parse_as_command(start) 8388 8389 return self.expression( 8390 exp.Grant, 8391 privileges=privileges, 8392 kind=kind, 8393 securable=securable, 8394 principals=principals, 8395 grant_option=grant_option, 8396 ) 8397 8398 def _parse_overlay(self) -> exp.Overlay: 8399 return self.expression( 8400 exp.Overlay, 8401 **{ # type: ignore 8402 "this": self._parse_bitwise(), 8403 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8404 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8405 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8406 }, 8407 ) 8408 8409 def _parse_format_name(self) -> exp.Property: 8410 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8411 # for FILE_FORMAT = <format_name> 8412 return self.expression( 8413 exp.Property, 8414 this=exp.var("FORMAT_NAME"), 8415 value=self._parse_string() or self._parse_table_parts(), 8416 ) 8417 8418 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8419 args: t.List[exp.Expression] = [] 8420 8421 if self._match(TokenType.DISTINCT): 8422 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8423 self._match(TokenType.COMMA) 8424 8425 args.extend(self._parse_csv(self._parse_assignment)) 8426 8427 return self.expression( 8428 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8429 ) 8430 8431 def _identifier_expression( 8432 self, token: t.Optional[Token] = None, **kwargs: t.Any 8433 ) -> exp.Identifier: 8434 token = token or self._prev 8435 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8436 expression.update_positions(token) 8437 return expression 8438 8439 def _build_pipe_cte( 8440 self, 8441 query: exp.Query, 8442 expressions: t.List[exp.Expression], 8443 alias_cte: t.Optional[exp.TableAlias] = None, 8444 ) -> exp.Select: 8445 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8446 if alias_cte: 8447 new_cte = alias_cte 8448 else: 8449 self._pipe_cte_counter += 1 8450 new_cte = f"__tmp{self._pipe_cte_counter}" 8451 8452 with_ = query.args.get("with") 8453 ctes = with_.pop() if with_ else None 8454 8455 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8456 if ctes: 8457 new_select.set("with", ctes) 8458 8459 return new_select.with_(new_cte, as_=query, copy=False) 8460 8461 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8462 select = self._parse_select(consume_pipe=False) 8463 if not select: 8464 return query 8465 8466 return self._build_pipe_cte( 8467 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8468 ) 8469 8470 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8471 limit = self._parse_limit() 8472 offset = self._parse_offset() 8473 if limit: 8474 curr_limit = query.args.get("limit", limit) 8475 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8476 query.limit(limit, copy=False) 8477 if offset: 8478 curr_offset = query.args.get("offset") 8479 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8480 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8481 8482 return query 8483 8484 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8485 this = self._parse_assignment() 8486 if self._match_text_seq("GROUP", "AND", advance=False): 8487 return this 8488 8489 this = self._parse_alias(this) 8490 8491 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8492 return self._parse_ordered(lambda: this) 8493 8494 return this 8495 8496 def _parse_pipe_syntax_aggregate_group_order_by( 8497 self, query: exp.Select, group_by_exists: bool = True 8498 ) -> exp.Select: 8499 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8500 aggregates_or_groups, orders = [], [] 8501 for element in expr: 8502 if isinstance(element, exp.Ordered): 8503 this = element.this 8504 if isinstance(this, exp.Alias): 8505 element.set("this", this.args["alias"]) 8506 orders.append(element) 8507 else: 8508 this = element 8509 aggregates_or_groups.append(this) 8510 8511 if group_by_exists: 8512 query.select(*aggregates_or_groups, copy=False).group_by( 8513 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8514 copy=False, 8515 ) 8516 else: 8517 query.select(*aggregates_or_groups, append=False, copy=False) 8518 8519 if orders: 8520 return query.order_by(*orders, append=False, copy=False) 8521 8522 return query 8523 8524 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8525 self._match_text_seq("AGGREGATE") 8526 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8527 8528 if self._match(TokenType.GROUP_BY) or ( 8529 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8530 ): 8531 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8532 8533 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8534 8535 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8536 first_setop = self.parse_set_operation(this=query) 8537 if not first_setop: 8538 return None 8539 8540 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8541 expr = self._parse_paren() 8542 return expr.assert_is(exp.Subquery).unnest() if expr else None 8543 8544 first_setop.this.pop() 8545 8546 setops = [ 8547 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8548 *self._parse_csv(_parse_and_unwrap_query), 8549 ] 8550 8551 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8552 with_ = query.args.get("with") 8553 ctes = with_.pop() if with_ else None 8554 8555 if isinstance(first_setop, exp.Union): 8556 query = query.union(*setops, copy=False, **first_setop.args) 8557 elif isinstance(first_setop, exp.Except): 8558 query = query.except_(*setops, copy=False, **first_setop.args) 8559 else: 8560 query = query.intersect(*setops, copy=False, **first_setop.args) 8561 8562 query.set("with", ctes) 8563 8564 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8565 8566 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8567 join = self._parse_join() 8568 if not join: 8569 return None 8570 8571 if isinstance(query, exp.Select): 8572 return query.join(join, copy=False) 8573 8574 return query 8575 8576 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8577 pivots = self._parse_pivots() 8578 if not pivots: 8579 return query 8580 8581 from_ = query.args.get("from") 8582 if from_: 8583 from_.this.set("pivots", pivots) 8584 8585 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8586 8587 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8588 self._match_text_seq("EXTEND") 8589 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8590 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8591 8592 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8593 sample = self._parse_table_sample() 8594 8595 with_ = query.args.get("with") 8596 if with_: 8597 with_.expressions[-1].this.set("sample", sample) 8598 else: 8599 query.set("sample", sample) 8600 8601 return query 8602 8603 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8604 if isinstance(query, exp.Subquery): 8605 query = exp.select("*").from_(query, copy=False) 8606 8607 if not query.args.get("from"): 8608 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8609 8610 while self._match(TokenType.PIPE_GT): 8611 start = self._curr 8612 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8613 if not parser: 8614 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8615 # keywords, making it tricky to disambiguate them without lookahead. The approach 8616 # here is to try and parse a set operation and if that fails, then try to parse a 8617 # join operator. If that fails as well, then the operator is not supported. 8618 parsed_query = self._parse_pipe_syntax_set_operator(query) 8619 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8620 if not parsed_query: 8621 self._retreat(start) 8622 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8623 break 8624 query = parsed_query 8625 else: 8626 query = parser(self, query) 8627 8628 return query 8629 8630 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8631 vars = self._parse_csv(self._parse_id_var) 8632 if not vars: 8633 return None 8634 8635 return self.expression( 8636 exp.DeclareItem, 8637 this=vars, 8638 kind=self._parse_types(), 8639 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8640 ) 8641 8642 def _parse_declare(self) -> exp.Declare | exp.Command: 8643 start = self._prev 8644 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8645 8646 if not expressions or self._curr: 8647 return self._parse_as_command(start) 8648 8649 return self.expression(exp.Declare, expressions=expressions)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEQUENCE, 456 TokenType.SINK, 457 TokenType.SOURCE, 458 TokenType.STAGE, 459 TokenType.STORAGE_INTEGRATION, 460 TokenType.STREAMLIT, 461 TokenType.TABLE, 462 TokenType.TAG, 463 TokenType.VIEW, 464 TokenType.WAREHOUSE, 465 } 466 467 CREATABLES = { 468 TokenType.COLUMN, 469 TokenType.CONSTRAINT, 470 TokenType.FOREIGN_KEY, 471 TokenType.FUNCTION, 472 TokenType.INDEX, 473 TokenType.PROCEDURE, 474 *DB_CREATABLES, 475 } 476 477 ALTERABLES = { 478 TokenType.INDEX, 479 TokenType.TABLE, 480 TokenType.VIEW, 481 } 482 483 # Tokens that can represent identifiers 484 ID_VAR_TOKENS = { 485 TokenType.ALL, 486 TokenType.ATTACH, 487 TokenType.VAR, 488 TokenType.ANTI, 489 TokenType.APPLY, 490 TokenType.ASC, 491 TokenType.ASOF, 492 TokenType.AUTO_INCREMENT, 493 TokenType.BEGIN, 494 TokenType.BPCHAR, 495 TokenType.CACHE, 496 TokenType.CASE, 497 TokenType.COLLATE, 498 TokenType.COMMAND, 499 TokenType.COMMENT, 500 TokenType.COMMIT, 501 TokenType.CONSTRAINT, 502 TokenType.COPY, 503 TokenType.CUBE, 504 TokenType.CURRENT_SCHEMA, 505 TokenType.DEFAULT, 506 TokenType.DELETE, 507 TokenType.DESC, 508 TokenType.DESCRIBE, 509 TokenType.DETACH, 510 TokenType.DICTIONARY, 511 TokenType.DIV, 512 TokenType.END, 513 TokenType.EXECUTE, 514 TokenType.EXPORT, 515 TokenType.ESCAPE, 516 TokenType.FALSE, 517 TokenType.FIRST, 518 TokenType.FILTER, 519 TokenType.FINAL, 520 TokenType.FORMAT, 521 TokenType.FULL, 522 TokenType.GET, 523 TokenType.IDENTIFIER, 524 TokenType.IS, 525 TokenType.ISNULL, 526 TokenType.INTERVAL, 527 TokenType.KEEP, 528 TokenType.KILL, 529 TokenType.LEFT, 530 TokenType.LIMIT, 531 TokenType.LOAD, 532 TokenType.MERGE, 533 TokenType.NATURAL, 534 TokenType.NEXT, 535 TokenType.OFFSET, 536 TokenType.OPERATOR, 537 TokenType.ORDINALITY, 538 TokenType.OVERLAPS, 539 TokenType.OVERWRITE, 540 TokenType.PARTITION, 541 TokenType.PERCENT, 542 TokenType.PIVOT, 543 TokenType.PRAGMA, 544 TokenType.PUT, 545 TokenType.RANGE, 546 TokenType.RECURSIVE, 547 TokenType.REFERENCES, 548 TokenType.REFRESH, 549 TokenType.RENAME, 550 TokenType.REPLACE, 551 TokenType.RIGHT, 552 TokenType.ROLLUP, 553 TokenType.ROW, 554 TokenType.ROWS, 555 TokenType.SEMI, 556 TokenType.SET, 557 TokenType.SETTINGS, 558 TokenType.SHOW, 559 TokenType.TEMPORARY, 560 TokenType.TOP, 561 TokenType.TRUE, 562 TokenType.TRUNCATE, 563 TokenType.UNIQUE, 564 TokenType.UNNEST, 565 TokenType.UNPIVOT, 566 TokenType.UPDATE, 567 TokenType.USE, 568 TokenType.VOLATILE, 569 TokenType.WINDOW, 570 *CREATABLES, 571 *SUBQUERY_PREDICATES, 572 *TYPE_TOKENS, 573 *NO_PAREN_FUNCTIONS, 574 } 575 ID_VAR_TOKENS.remove(TokenType.UNION) 576 577 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 578 TokenType.ANTI, 579 TokenType.APPLY, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.expression( 763 exp.Cast if self.STRICT_CAST else exp.TryCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 EXPRESSION_PARSERS = { 797 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 798 exp.Column: lambda self: self._parse_column(), 799 exp.Condition: lambda self: self._parse_assignment(), 800 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 801 exp.Expression: lambda self: self._parse_expression(), 802 exp.From: lambda self: self._parse_from(joins=True), 803 exp.Group: lambda self: self._parse_group(), 804 exp.Having: lambda self: self._parse_having(), 805 exp.Hint: lambda self: self._parse_hint_body(), 806 exp.Identifier: lambda self: self._parse_id_var(), 807 exp.Join: lambda self: self._parse_join(), 808 exp.Lambda: lambda self: self._parse_lambda(), 809 exp.Lateral: lambda self: self._parse_lateral(), 810 exp.Limit: lambda self: self._parse_limit(), 811 exp.Offset: lambda self: self._parse_offset(), 812 exp.Order: lambda self: self._parse_order(), 813 exp.Ordered: lambda self: self._parse_ordered(), 814 exp.Properties: lambda self: self._parse_properties(), 815 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 816 exp.Qualify: lambda self: self._parse_qualify(), 817 exp.Returning: lambda self: self._parse_returning(), 818 exp.Select: lambda self: self._parse_select(), 819 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 820 exp.Table: lambda self: self._parse_table_parts(), 821 exp.TableAlias: lambda self: self._parse_table_alias(), 822 exp.Tuple: lambda self: self._parse_value(values=False), 823 exp.Whens: lambda self: self._parse_when_matched(), 824 exp.Where: lambda self: self._parse_where(), 825 exp.Window: lambda self: self._parse_named_window(), 826 exp.With: lambda self: self._parse_with(), 827 "JOIN_TYPE": lambda self: self._parse_join_parts(), 828 } 829 830 STATEMENT_PARSERS = { 831 TokenType.ALTER: lambda self: self._parse_alter(), 832 TokenType.ANALYZE: lambda self: self._parse_analyze(), 833 TokenType.BEGIN: lambda self: self._parse_transaction(), 834 TokenType.CACHE: lambda self: self._parse_cache(), 835 TokenType.COMMENT: lambda self: self._parse_comment(), 836 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 837 TokenType.COPY: lambda self: self._parse_copy(), 838 TokenType.CREATE: lambda self: self._parse_create(), 839 TokenType.DELETE: lambda self: self._parse_delete(), 840 TokenType.DESC: lambda self: self._parse_describe(), 841 TokenType.DESCRIBE: lambda self: self._parse_describe(), 842 TokenType.DROP: lambda self: self._parse_drop(), 843 TokenType.GRANT: lambda self: self._parse_grant(), 844 TokenType.INSERT: lambda self: self._parse_insert(), 845 TokenType.KILL: lambda self: self._parse_kill(), 846 TokenType.LOAD: lambda self: self._parse_load(), 847 TokenType.MERGE: lambda self: self._parse_merge(), 848 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 849 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 850 TokenType.REFRESH: lambda self: self._parse_refresh(), 851 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 852 TokenType.SET: lambda self: self._parse_set(), 853 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 854 TokenType.UNCACHE: lambda self: self._parse_uncache(), 855 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 856 TokenType.UPDATE: lambda self: self._parse_update(), 857 TokenType.USE: lambda self: self._parse_use(), 858 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 859 } 860 861 UNARY_PARSERS = { 862 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 863 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 864 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 865 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 866 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 867 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 868 } 869 870 STRING_PARSERS = { 871 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 872 exp.RawString, this=token.text 873 ), 874 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 875 exp.National, this=token.text 876 ), 877 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 878 TokenType.STRING: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=True 880 ), 881 TokenType.UNICODE_STRING: lambda self, token: self.expression( 882 exp.UnicodeString, 883 this=token.text, 884 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 885 ), 886 } 887 888 NUMERIC_PARSERS = { 889 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 890 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 891 TokenType.HEX_STRING: lambda self, token: self.expression( 892 exp.HexString, 893 this=token.text, 894 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 895 ), 896 TokenType.NUMBER: lambda self, token: self.expression( 897 exp.Literal, this=token.text, is_string=False 898 ), 899 } 900 901 PRIMARY_PARSERS = { 902 **STRING_PARSERS, 903 **NUMERIC_PARSERS, 904 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 905 TokenType.NULL: lambda self, _: self.expression(exp.Null), 906 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 907 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 908 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 909 TokenType.STAR: lambda self, _: self._parse_star_ops(), 910 } 911 912 PLACEHOLDER_PARSERS = { 913 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 914 TokenType.PARAMETER: lambda self: self._parse_parameter(), 915 TokenType.COLON: lambda self: ( 916 self.expression(exp.Placeholder, this=self._prev.text) 917 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 918 else None 919 ), 920 } 921 922 RANGE_PARSERS = { 923 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 924 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 925 TokenType.GLOB: binary_range_parser(exp.Glob), 926 TokenType.ILIKE: binary_range_parser(exp.ILike), 927 TokenType.IN: lambda self, this: self._parse_in(this), 928 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 929 TokenType.IS: lambda self, this: self._parse_is(this), 930 TokenType.LIKE: binary_range_parser(exp.Like), 931 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 932 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 933 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 934 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 935 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 936 } 937 938 PIPE_SYNTAX_TRANSFORM_PARSERS = { 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 "AS": lambda self, query: self._build_pipe_cte( 941 query, [exp.Star()], self._parse_table_alias() 942 ), 943 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 944 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 945 "ORDER BY": lambda self, query: query.order_by( 946 self._parse_order(), append=False, copy=False 947 ), 948 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 950 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 951 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 953 } 954 955 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 956 "ALLOWED_VALUES": lambda self: self.expression( 957 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 958 ), 959 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 960 "AUTO": lambda self: self._parse_auto_property(), 961 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 962 "BACKUP": lambda self: self.expression( 963 exp.BackupProperty, this=self._parse_var(any_token=True) 964 ), 965 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 966 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 968 "CHECKSUM": lambda self: self._parse_checksum(), 969 "CLUSTER BY": lambda self: self._parse_cluster(), 970 "CLUSTERED": lambda self: self._parse_clustered_by(), 971 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 972 exp.CollateProperty, **kwargs 973 ), 974 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 975 "CONTAINS": lambda self: self._parse_contains_property(), 976 "COPY": lambda self: self._parse_copy_property(), 977 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 978 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 979 "DEFINER": lambda self: self._parse_definer(), 980 "DETERMINISTIC": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 984 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 985 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 986 "DISTKEY": lambda self: self._parse_distkey(), 987 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 988 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 989 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 990 "ENVIRONMENT": lambda self: self.expression( 991 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 992 ), 993 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 994 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 995 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 996 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "FREESPACE": lambda self: self._parse_freespace(), 998 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 999 "HEAP": lambda self: self.expression(exp.HeapProperty), 1000 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1001 "IMMUTABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1003 ), 1004 "INHERITS": lambda self: self.expression( 1005 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1006 ), 1007 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1008 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1009 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1010 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1011 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1014 "LOCK": lambda self: self._parse_locking(), 1015 "LOCKING": lambda self: self._parse_locking(), 1016 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1017 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1018 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1019 "MODIFIES": lambda self: self._parse_modifies_property(), 1020 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1021 "NO": lambda self: self._parse_no_property(), 1022 "ON": lambda self: self._parse_on_property(), 1023 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1024 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1025 "PARTITION": lambda self: self._parse_partitioned_of(), 1026 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1028 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1029 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1030 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1031 "READS": lambda self: self._parse_reads_property(), 1032 "REMOTE": lambda self: self._parse_remote_with_connection(), 1033 "RETURNS": lambda self: self._parse_returns(), 1034 "STRICT": lambda self: self.expression(exp.StrictProperty), 1035 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1036 "ROW": lambda self: self._parse_row(), 1037 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1038 "SAMPLE": lambda self: self.expression( 1039 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1040 ), 1041 "SECURE": lambda self: self.expression(exp.SecureProperty), 1042 "SECURITY": lambda self: self._parse_security(), 1043 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1044 "SETTINGS": lambda self: self._parse_settings_property(), 1045 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1046 "SORTKEY": lambda self: self._parse_sortkey(), 1047 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1048 "STABLE": lambda self: self.expression( 1049 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1050 ), 1051 "STORED": lambda self: self._parse_stored(), 1052 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1053 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1054 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1055 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1056 "TO": lambda self: self._parse_to_table(), 1057 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1058 "TRANSFORM": lambda self: self.expression( 1059 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1060 ), 1061 "TTL": lambda self: self._parse_ttl(), 1062 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1063 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1064 "VOLATILE": lambda self: self._parse_volatile_property(), 1065 "WITH": lambda self: self._parse_with_property(), 1066 } 1067 1068 CONSTRAINT_PARSERS = { 1069 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1070 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1071 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1072 "CHARACTER SET": lambda self: self.expression( 1073 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "CHECK": lambda self: self.expression( 1076 exp.CheckColumnConstraint, 1077 this=self._parse_wrapped(self._parse_assignment), 1078 enforced=self._match_text_seq("ENFORCED"), 1079 ), 1080 "COLLATE": lambda self: self.expression( 1081 exp.CollateColumnConstraint, 1082 this=self._parse_identifier() or self._parse_column(), 1083 ), 1084 "COMMENT": lambda self: self.expression( 1085 exp.CommentColumnConstraint, this=self._parse_string() 1086 ), 1087 "COMPRESS": lambda self: self._parse_compress(), 1088 "CLUSTERED": lambda self: self.expression( 1089 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "NONCLUSTERED": lambda self: self.expression( 1092 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "DEFAULT": lambda self: self.expression( 1095 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1096 ), 1097 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1098 "EPHEMERAL": lambda self: self.expression( 1099 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "EXCLUDE": lambda self: self.expression( 1102 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1103 ), 1104 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1105 "FORMAT": lambda self: self.expression( 1106 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1107 ), 1108 "GENERATED": lambda self: self._parse_generated_as_identity(), 1109 "IDENTITY": lambda self: self._parse_auto_increment(), 1110 "INLINE": lambda self: self._parse_inline(), 1111 "LIKE": lambda self: self._parse_create_like(), 1112 "NOT": lambda self: self._parse_not_constraint(), 1113 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1114 "ON": lambda self: ( 1115 self._match(TokenType.UPDATE) 1116 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1117 ) 1118 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1119 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1120 "PERIOD": lambda self: self._parse_period_for_system_time(), 1121 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1122 "REFERENCES": lambda self: self._parse_references(match=False), 1123 "TITLE": lambda self: self.expression( 1124 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1125 ), 1126 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1127 "UNIQUE": lambda self: self._parse_unique(), 1128 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1129 "WATERMARK": lambda self: self.expression( 1130 exp.WatermarkColumnConstraint, 1131 this=self._match(TokenType.FOR) and self._parse_column(), 1132 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1133 ), 1134 "WITH": lambda self: self.expression( 1135 exp.Properties, expressions=self._parse_wrapped_properties() 1136 ), 1137 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1139 } 1140 1141 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1142 klass = ( 1143 exp.PartitionedByBucket 1144 if self._prev.text.upper() == "BUCKET" 1145 else exp.PartitionByTruncate 1146 ) 1147 1148 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1149 this, expression = seq_get(args, 0), seq_get(args, 1) 1150 1151 if isinstance(this, exp.Literal): 1152 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1153 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1154 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1155 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1156 # 1157 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1158 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1159 this, expression = expression, this 1160 1161 return self.expression(klass, this=this, expression=expression) 1162 1163 ALTER_PARSERS = { 1164 "ADD": lambda self: self._parse_alter_table_add(), 1165 "AS": lambda self: self._parse_select(), 1166 "ALTER": lambda self: self._parse_alter_table_alter(), 1167 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1168 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1169 "DROP": lambda self: self._parse_alter_table_drop(), 1170 "RENAME": lambda self: self._parse_alter_table_rename(), 1171 "SET": lambda self: self._parse_alter_table_set(), 1172 "SWAP": lambda self: self.expression( 1173 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1174 ), 1175 } 1176 1177 ALTER_ALTER_PARSERS = { 1178 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1179 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1180 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1181 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1182 } 1183 1184 SCHEMA_UNNAMED_CONSTRAINTS = { 1185 "CHECK", 1186 "EXCLUDE", 1187 "FOREIGN KEY", 1188 "LIKE", 1189 "PERIOD", 1190 "PRIMARY KEY", 1191 "UNIQUE", 1192 "WATERMARK", 1193 "BUCKET", 1194 "TRUNCATE", 1195 } 1196 1197 NO_PAREN_FUNCTION_PARSERS = { 1198 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1199 "CASE": lambda self: self._parse_case(), 1200 "CONNECT_BY_ROOT": lambda self: self.expression( 1201 exp.ConnectByRoot, this=self._parse_column() 1202 ), 1203 "IF": lambda self: self._parse_if(), 1204 } 1205 1206 INVALID_FUNC_NAME_TOKENS = { 1207 TokenType.IDENTIFIER, 1208 TokenType.STRING, 1209 } 1210 1211 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1212 1213 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1214 1215 FUNCTION_PARSERS = { 1216 **{ 1217 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1218 }, 1219 **{ 1220 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1221 }, 1222 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1223 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1224 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1225 "DECODE": lambda self: self._parse_decode(), 1226 "EXTRACT": lambda self: self._parse_extract(), 1227 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1228 "GAP_FILL": lambda self: self._parse_gap_fill(), 1229 "JSON_OBJECT": lambda self: self._parse_json_object(), 1230 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1231 "JSON_TABLE": lambda self: self._parse_json_table(), 1232 "MATCH": lambda self: self._parse_match_against(), 1233 "NORMALIZE": lambda self: self._parse_normalize(), 1234 "OPENJSON": lambda self: self._parse_open_json(), 1235 "OVERLAY": lambda self: self._parse_overlay(), 1236 "POSITION": lambda self: self._parse_position(), 1237 "PREDICT": lambda self: self._parse_predict(), 1238 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1239 "STRING_AGG": lambda self: self._parse_string_agg(), 1240 "SUBSTRING": lambda self: self._parse_substring(), 1241 "TRIM": lambda self: self._parse_trim(), 1242 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1243 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1244 "XMLELEMENT": lambda self: self.expression( 1245 exp.XMLElement, 1246 this=self._match_text_seq("NAME") and self._parse_id_var(), 1247 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1248 ), 1249 "XMLTABLE": lambda self: self._parse_xml_table(), 1250 } 1251 1252 QUERY_MODIFIER_PARSERS = { 1253 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1254 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1255 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1256 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1257 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1258 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1259 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1260 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1261 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1262 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1263 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1264 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1265 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1266 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1268 TokenType.CLUSTER_BY: lambda self: ( 1269 "cluster", 1270 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1271 ), 1272 TokenType.DISTRIBUTE_BY: lambda self: ( 1273 "distribute", 1274 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1275 ), 1276 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1277 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1278 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1279 } 1280 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1281 1282 SET_PARSERS = { 1283 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1284 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1285 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1286 "TRANSACTION": lambda self: self._parse_set_transaction(), 1287 } 1288 1289 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1290 1291 TYPE_LITERAL_PARSERS = { 1292 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1293 } 1294 1295 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1296 1297 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1298 1299 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1300 1301 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1302 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1303 "ISOLATION": ( 1304 ("LEVEL", "REPEATABLE", "READ"), 1305 ("LEVEL", "READ", "COMMITTED"), 1306 ("LEVEL", "READ", "UNCOMITTED"), 1307 ("LEVEL", "SERIALIZABLE"), 1308 ), 1309 "READ": ("WRITE", "ONLY"), 1310 } 1311 1312 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1313 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1314 ) 1315 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1316 1317 CREATE_SEQUENCE: OPTIONS_TYPE = { 1318 "SCALE": ("EXTEND", "NOEXTEND"), 1319 "SHARD": ("EXTEND", "NOEXTEND"), 1320 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1321 **dict.fromkeys( 1322 ( 1323 "SESSION", 1324 "GLOBAL", 1325 "KEEP", 1326 "NOKEEP", 1327 "ORDER", 1328 "NOORDER", 1329 "NOCACHE", 1330 "CYCLE", 1331 "NOCYCLE", 1332 "NOMINVALUE", 1333 "NOMAXVALUE", 1334 "NOSCALE", 1335 "NOSHARD", 1336 ), 1337 tuple(), 1338 ), 1339 } 1340 1341 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1342 1343 USABLES: OPTIONS_TYPE = dict.fromkeys( 1344 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1345 ) 1346 1347 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1348 1349 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1350 "TYPE": ("EVOLUTION",), 1351 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1352 } 1353 1354 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1355 1356 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1357 1358 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1359 "NOT": ("ENFORCED",), 1360 "MATCH": ( 1361 "FULL", 1362 "PARTIAL", 1363 "SIMPLE", 1364 ), 1365 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1366 "USING": ( 1367 "BTREE", 1368 "HASH", 1369 ), 1370 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1371 } 1372 1373 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1374 "NO": ("OTHERS",), 1375 "CURRENT": ("ROW",), 1376 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1377 } 1378 1379 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1380 1381 CLONE_KEYWORDS = {"CLONE", "COPY"} 1382 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1383 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1384 1385 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1386 1387 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1388 1389 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1390 1391 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1392 1393 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1394 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1395 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1396 1397 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1398 1399 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1400 1401 ADD_CONSTRAINT_TOKENS = { 1402 TokenType.CONSTRAINT, 1403 TokenType.FOREIGN_KEY, 1404 TokenType.INDEX, 1405 TokenType.KEY, 1406 TokenType.PRIMARY_KEY, 1407 TokenType.UNIQUE, 1408 } 1409 1410 DISTINCT_TOKENS = {TokenType.DISTINCT} 1411 1412 NULL_TOKENS = {TokenType.NULL} 1413 1414 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1415 1416 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1417 1418 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1419 1420 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1421 1422 ODBC_DATETIME_LITERALS = { 1423 "d": exp.Date, 1424 "t": exp.Time, 1425 "ts": exp.Timestamp, 1426 } 1427 1428 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1429 1430 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1431 1432 # The style options for the DESCRIBE statement 1433 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1434 1435 # The style options for the ANALYZE statement 1436 ANALYZE_STYLES = { 1437 "BUFFER_USAGE_LIMIT", 1438 "FULL", 1439 "LOCAL", 1440 "NO_WRITE_TO_BINLOG", 1441 "SAMPLE", 1442 "SKIP_LOCKED", 1443 "VERBOSE", 1444 } 1445 1446 ANALYZE_EXPRESSION_PARSERS = { 1447 "ALL": lambda self: self._parse_analyze_columns(), 1448 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1449 "DELETE": lambda self: self._parse_analyze_delete(), 1450 "DROP": lambda self: self._parse_analyze_histogram(), 1451 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1452 "LIST": lambda self: self._parse_analyze_list(), 1453 "PREDICATE": lambda self: self._parse_analyze_columns(), 1454 "UPDATE": lambda self: self._parse_analyze_histogram(), 1455 "VALIDATE": lambda self: self._parse_analyze_validate(), 1456 } 1457 1458 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1459 1460 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1461 1462 OPERATION_MODIFIERS: t.Set[str] = set() 1463 1464 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1465 1466 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1467 1468 STRICT_CAST = True 1469 1470 PREFIXED_PIVOT_COLUMNS = False 1471 IDENTIFY_PIVOT_STRINGS = False 1472 1473 LOG_DEFAULTS_TO_LN = False 1474 1475 # Whether the table sample clause expects CSV syntax 1476 TABLESAMPLE_CSV = False 1477 1478 # The default method used for table sampling 1479 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1480 1481 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1482 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1483 1484 # Whether the TRIM function expects the characters to trim as its first argument 1485 TRIM_PATTERN_FIRST = False 1486 1487 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1488 STRING_ALIASES = False 1489 1490 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1491 MODIFIERS_ATTACHED_TO_SET_OP = True 1492 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1493 1494 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1495 NO_PAREN_IF_COMMANDS = True 1496 1497 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1498 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1499 1500 # Whether the `:` operator is used to extract a value from a VARIANT column 1501 COLON_IS_VARIANT_EXTRACT = False 1502 1503 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1504 # If this is True and '(' is not found, the keyword will be treated as an identifier 1505 VALUES_FOLLOWED_BY_PAREN = True 1506 1507 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1508 SUPPORTS_IMPLICIT_UNNEST = False 1509 1510 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1511 INTERVAL_SPANS = True 1512 1513 # Whether a PARTITION clause can follow a table reference 1514 SUPPORTS_PARTITION_SELECTION = False 1515 1516 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1517 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1518 1519 # Whether the 'AS' keyword is optional in the CTE definition syntax 1520 OPTIONAL_ALIAS_TOKEN_CTE = True 1521 1522 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1523 ALTER_RENAME_REQUIRES_COLUMN = True 1524 1525 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1526 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1527 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1528 # as BigQuery, where all joins have the same precedence. 1529 JOINS_HAVE_EQUAL_PRECEDENCE = False 1530 1531 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1532 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1533 1534 # Whether map literals support arbitrary expressions as keys. 1535 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1536 # When False, keys are typically restricted to identifiers. 1537 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1538 1539 __slots__ = ( 1540 "error_level", 1541 "error_message_context", 1542 "max_errors", 1543 "dialect", 1544 "sql", 1545 "errors", 1546 "_tokens", 1547 "_index", 1548 "_curr", 1549 "_next", 1550 "_prev", 1551 "_prev_comments", 1552 "_pipe_cte_counter", 1553 ) 1554 1555 # Autofilled 1556 SHOW_TRIE: t.Dict = {} 1557 SET_TRIE: t.Dict = {} 1558 1559 def __init__( 1560 self, 1561 error_level: t.Optional[ErrorLevel] = None, 1562 error_message_context: int = 100, 1563 max_errors: int = 3, 1564 dialect: DialectType = None, 1565 ): 1566 from sqlglot.dialects import Dialect 1567 1568 self.error_level = error_level or ErrorLevel.IMMEDIATE 1569 self.error_message_context = error_message_context 1570 self.max_errors = max_errors 1571 self.dialect = Dialect.get_or_raise(dialect) 1572 self.reset() 1573 1574 def reset(self): 1575 self.sql = "" 1576 self.errors = [] 1577 self._tokens = [] 1578 self._index = 0 1579 self._curr = None 1580 self._next = None 1581 self._prev = None 1582 self._prev_comments = None 1583 self._pipe_cte_counter = 0 1584 1585 def parse( 1586 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1587 ) -> t.List[t.Optional[exp.Expression]]: 1588 """ 1589 Parses a list of tokens and returns a list of syntax trees, one tree 1590 per parsed SQL statement. 1591 1592 Args: 1593 raw_tokens: The list of tokens. 1594 sql: The original SQL string, used to produce helpful debug messages. 1595 1596 Returns: 1597 The list of the produced syntax trees. 1598 """ 1599 return self._parse( 1600 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1601 ) 1602 1603 def parse_into( 1604 self, 1605 expression_types: exp.IntoType, 1606 raw_tokens: t.List[Token], 1607 sql: t.Optional[str] = None, 1608 ) -> t.List[t.Optional[exp.Expression]]: 1609 """ 1610 Parses a list of tokens into a given Expression type. If a collection of Expression 1611 types is given instead, this method will try to parse the token list into each one 1612 of them, stopping at the first for which the parsing succeeds. 1613 1614 Args: 1615 expression_types: The expression type(s) to try and parse the token list into. 1616 raw_tokens: The list of tokens. 1617 sql: The original SQL string, used to produce helpful debug messages. 1618 1619 Returns: 1620 The target Expression. 1621 """ 1622 errors = [] 1623 for expression_type in ensure_list(expression_types): 1624 parser = self.EXPRESSION_PARSERS.get(expression_type) 1625 if not parser: 1626 raise TypeError(f"No parser registered for {expression_type}") 1627 1628 try: 1629 return self._parse(parser, raw_tokens, sql) 1630 except ParseError as e: 1631 e.errors[0]["into_expression"] = expression_type 1632 errors.append(e) 1633 1634 raise ParseError( 1635 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1636 errors=merge_errors(errors), 1637 ) from errors[-1] 1638 1639 def _parse( 1640 self, 1641 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1642 raw_tokens: t.List[Token], 1643 sql: t.Optional[str] = None, 1644 ) -> t.List[t.Optional[exp.Expression]]: 1645 self.reset() 1646 self.sql = sql or "" 1647 1648 total = len(raw_tokens) 1649 chunks: t.List[t.List[Token]] = [[]] 1650 1651 for i, token in enumerate(raw_tokens): 1652 if token.token_type == TokenType.SEMICOLON: 1653 if token.comments: 1654 chunks.append([token]) 1655 1656 if i < total - 1: 1657 chunks.append([]) 1658 else: 1659 chunks[-1].append(token) 1660 1661 expressions = [] 1662 1663 for tokens in chunks: 1664 self._index = -1 1665 self._tokens = tokens 1666 self._advance() 1667 1668 expressions.append(parse_method(self)) 1669 1670 if self._index < len(self._tokens): 1671 self.raise_error("Invalid expression / Unexpected token") 1672 1673 self.check_errors() 1674 1675 return expressions 1676 1677 def check_errors(self) -> None: 1678 """Logs or raises any found errors, depending on the chosen error level setting.""" 1679 if self.error_level == ErrorLevel.WARN: 1680 for error in self.errors: 1681 logger.error(str(error)) 1682 elif self.error_level == ErrorLevel.RAISE and self.errors: 1683 raise ParseError( 1684 concat_messages(self.errors, self.max_errors), 1685 errors=merge_errors(self.errors), 1686 ) 1687 1688 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1689 """ 1690 Appends an error in the list of recorded errors or raises it, depending on the chosen 1691 error level setting. 1692 """ 1693 token = token or self._curr or self._prev or Token.string("") 1694 start = token.start 1695 end = token.end + 1 1696 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1697 highlight = self.sql[start:end] 1698 end_context = self.sql[end : end + self.error_message_context] 1699 1700 error = ParseError.new( 1701 f"{message}. Line {token.line}, Col: {token.col}.\n" 1702 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1703 description=message, 1704 line=token.line, 1705 col=token.col, 1706 start_context=start_context, 1707 highlight=highlight, 1708 end_context=end_context, 1709 ) 1710 1711 if self.error_level == ErrorLevel.IMMEDIATE: 1712 raise error 1713 1714 self.errors.append(error) 1715 1716 def expression( 1717 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1718 ) -> E: 1719 """ 1720 Creates a new, validated Expression. 1721 1722 Args: 1723 exp_class: The expression class to instantiate. 1724 comments: An optional list of comments to attach to the expression. 1725 kwargs: The arguments to set for the expression along with their respective values. 1726 1727 Returns: 1728 The target expression. 1729 """ 1730 instance = exp_class(**kwargs) 1731 instance.add_comments(comments) if comments else self._add_comments(instance) 1732 return self.validate_expression(instance) 1733 1734 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1735 if expression and self._prev_comments: 1736 expression.add_comments(self._prev_comments) 1737 self._prev_comments = None 1738 1739 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1740 """ 1741 Validates an Expression, making sure that all its mandatory arguments are set. 1742 1743 Args: 1744 expression: The expression to validate. 1745 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1746 1747 Returns: 1748 The validated expression. 1749 """ 1750 if self.error_level != ErrorLevel.IGNORE: 1751 for error_message in expression.error_messages(args): 1752 self.raise_error(error_message) 1753 1754 return expression 1755 1756 def _find_sql(self, start: Token, end: Token) -> str: 1757 return self.sql[start.start : end.end + 1] 1758 1759 def _is_connected(self) -> bool: 1760 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1761 1762 def _advance(self, times: int = 1) -> None: 1763 self._index += times 1764 self._curr = seq_get(self._tokens, self._index) 1765 self._next = seq_get(self._tokens, self._index + 1) 1766 1767 if self._index > 0: 1768 self._prev = self._tokens[self._index - 1] 1769 self._prev_comments = self._prev.comments 1770 else: 1771 self._prev = None 1772 self._prev_comments = None 1773 1774 def _retreat(self, index: int) -> None: 1775 if index != self._index: 1776 self._advance(index - self._index) 1777 1778 def _warn_unsupported(self) -> None: 1779 if len(self._tokens) <= 1: 1780 return 1781 1782 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1783 # interested in emitting a warning for the one being currently processed. 1784 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1785 1786 logger.warning( 1787 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1788 ) 1789 1790 def _parse_command(self) -> exp.Command: 1791 self._warn_unsupported() 1792 return self.expression( 1793 exp.Command, 1794 comments=self._prev_comments, 1795 this=self._prev.text.upper(), 1796 expression=self._parse_string(), 1797 ) 1798 1799 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1800 """ 1801 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1802 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1803 solve this by setting & resetting the parser state accordingly 1804 """ 1805 index = self._index 1806 error_level = self.error_level 1807 1808 self.error_level = ErrorLevel.IMMEDIATE 1809 try: 1810 this = parse_method() 1811 except ParseError: 1812 this = None 1813 finally: 1814 if not this or retreat: 1815 self._retreat(index) 1816 self.error_level = error_level 1817 1818 return this 1819 1820 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1821 start = self._prev 1822 exists = self._parse_exists() if allow_exists else None 1823 1824 self._match(TokenType.ON) 1825 1826 materialized = self._match_text_seq("MATERIALIZED") 1827 kind = self._match_set(self.CREATABLES) and self._prev 1828 if not kind: 1829 return self._parse_as_command(start) 1830 1831 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1832 this = self._parse_user_defined_function(kind=kind.token_type) 1833 elif kind.token_type == TokenType.TABLE: 1834 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1835 elif kind.token_type == TokenType.COLUMN: 1836 this = self._parse_column() 1837 else: 1838 this = self._parse_id_var() 1839 1840 self._match(TokenType.IS) 1841 1842 return self.expression( 1843 exp.Comment, 1844 this=this, 1845 kind=kind.text, 1846 expression=self._parse_string(), 1847 exists=exists, 1848 materialized=materialized, 1849 ) 1850 1851 def _parse_to_table( 1852 self, 1853 ) -> exp.ToTableProperty: 1854 table = self._parse_table_parts(schema=True) 1855 return self.expression(exp.ToTableProperty, this=table) 1856 1857 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1858 def _parse_ttl(self) -> exp.Expression: 1859 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1860 this = self._parse_bitwise() 1861 1862 if self._match_text_seq("DELETE"): 1863 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1864 if self._match_text_seq("RECOMPRESS"): 1865 return self.expression( 1866 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1867 ) 1868 if self._match_text_seq("TO", "DISK"): 1869 return self.expression( 1870 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1871 ) 1872 if self._match_text_seq("TO", "VOLUME"): 1873 return self.expression( 1874 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1875 ) 1876 1877 return this 1878 1879 expressions = self._parse_csv(_parse_ttl_action) 1880 where = self._parse_where() 1881 group = self._parse_group() 1882 1883 aggregates = None 1884 if group and self._match(TokenType.SET): 1885 aggregates = self._parse_csv(self._parse_set_item) 1886 1887 return self.expression( 1888 exp.MergeTreeTTL, 1889 expressions=expressions, 1890 where=where, 1891 group=group, 1892 aggregates=aggregates, 1893 ) 1894 1895 def _parse_statement(self) -> t.Optional[exp.Expression]: 1896 if self._curr is None: 1897 return None 1898 1899 if self._match_set(self.STATEMENT_PARSERS): 1900 comments = self._prev_comments 1901 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1902 stmt.add_comments(comments, prepend=True) 1903 return stmt 1904 1905 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1906 return self._parse_command() 1907 1908 expression = self._parse_expression() 1909 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1910 return self._parse_query_modifiers(expression) 1911 1912 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1913 start = self._prev 1914 temporary = self._match(TokenType.TEMPORARY) 1915 materialized = self._match_text_seq("MATERIALIZED") 1916 1917 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1918 if not kind: 1919 return self._parse_as_command(start) 1920 1921 concurrently = self._match_text_seq("CONCURRENTLY") 1922 if_exists = exists or self._parse_exists() 1923 1924 if kind == "COLUMN": 1925 this = self._parse_column() 1926 else: 1927 this = self._parse_table_parts( 1928 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1929 ) 1930 1931 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1932 1933 if self._match(TokenType.L_PAREN, advance=False): 1934 expressions = self._parse_wrapped_csv(self._parse_types) 1935 else: 1936 expressions = None 1937 1938 return self.expression( 1939 exp.Drop, 1940 exists=if_exists, 1941 this=this, 1942 expressions=expressions, 1943 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1944 temporary=temporary, 1945 materialized=materialized, 1946 cascade=self._match_text_seq("CASCADE"), 1947 constraints=self._match_text_seq("CONSTRAINTS"), 1948 purge=self._match_text_seq("PURGE"), 1949 cluster=cluster, 1950 concurrently=concurrently, 1951 ) 1952 1953 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1954 return ( 1955 self._match_text_seq("IF") 1956 and (not not_ or self._match(TokenType.NOT)) 1957 and self._match(TokenType.EXISTS) 1958 ) 1959 1960 def _parse_create(self) -> exp.Create | exp.Command: 1961 # Note: this can't be None because we've matched a statement parser 1962 start = self._prev 1963 1964 replace = ( 1965 start.token_type == TokenType.REPLACE 1966 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1967 or self._match_pair(TokenType.OR, TokenType.ALTER) 1968 ) 1969 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1970 1971 unique = self._match(TokenType.UNIQUE) 1972 1973 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1974 clustered = True 1975 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1976 "COLUMNSTORE" 1977 ): 1978 clustered = False 1979 else: 1980 clustered = None 1981 1982 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1983 self._advance() 1984 1985 properties = None 1986 create_token = self._match_set(self.CREATABLES) and self._prev 1987 1988 if not create_token: 1989 # exp.Properties.Location.POST_CREATE 1990 properties = self._parse_properties() 1991 create_token = self._match_set(self.CREATABLES) and self._prev 1992 1993 if not properties or not create_token: 1994 return self._parse_as_command(start) 1995 1996 concurrently = self._match_text_seq("CONCURRENTLY") 1997 exists = self._parse_exists(not_=True) 1998 this = None 1999 expression: t.Optional[exp.Expression] = None 2000 indexes = None 2001 no_schema_binding = None 2002 begin = None 2003 end = None 2004 clone = None 2005 2006 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2007 nonlocal properties 2008 if properties and temp_props: 2009 properties.expressions.extend(temp_props.expressions) 2010 elif temp_props: 2011 properties = temp_props 2012 2013 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2014 this = self._parse_user_defined_function(kind=create_token.token_type) 2015 2016 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2017 extend_props(self._parse_properties()) 2018 2019 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2020 extend_props(self._parse_properties()) 2021 2022 if not expression: 2023 if self._match(TokenType.COMMAND): 2024 expression = self._parse_as_command(self._prev) 2025 else: 2026 begin = self._match(TokenType.BEGIN) 2027 return_ = self._match_text_seq("RETURN") 2028 2029 if self._match(TokenType.STRING, advance=False): 2030 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2031 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2032 expression = self._parse_string() 2033 extend_props(self._parse_properties()) 2034 else: 2035 expression = self._parse_user_defined_function_expression() 2036 2037 end = self._match_text_seq("END") 2038 2039 if return_: 2040 expression = self.expression(exp.Return, this=expression) 2041 elif create_token.token_type == TokenType.INDEX: 2042 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2043 if not self._match(TokenType.ON): 2044 index = self._parse_id_var() 2045 anonymous = False 2046 else: 2047 index = None 2048 anonymous = True 2049 2050 this = self._parse_index(index=index, anonymous=anonymous) 2051 elif create_token.token_type in self.DB_CREATABLES: 2052 table_parts = self._parse_table_parts( 2053 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2054 ) 2055 2056 # exp.Properties.Location.POST_NAME 2057 self._match(TokenType.COMMA) 2058 extend_props(self._parse_properties(before=True)) 2059 2060 this = self._parse_schema(this=table_parts) 2061 2062 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2063 extend_props(self._parse_properties()) 2064 2065 has_alias = self._match(TokenType.ALIAS) 2066 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2067 # exp.Properties.Location.POST_ALIAS 2068 extend_props(self._parse_properties()) 2069 2070 if create_token.token_type == TokenType.SEQUENCE: 2071 expression = self._parse_types() 2072 extend_props(self._parse_properties()) 2073 else: 2074 expression = self._parse_ddl_select() 2075 2076 # Some dialects also support using a table as an alias instead of a SELECT. 2077 # Here we fallback to this as an alternative. 2078 if not expression and has_alias: 2079 expression = self._try_parse(self._parse_table_parts) 2080 2081 if create_token.token_type == TokenType.TABLE: 2082 # exp.Properties.Location.POST_EXPRESSION 2083 extend_props(self._parse_properties()) 2084 2085 indexes = [] 2086 while True: 2087 index = self._parse_index() 2088 2089 # exp.Properties.Location.POST_INDEX 2090 extend_props(self._parse_properties()) 2091 if not index: 2092 break 2093 else: 2094 self._match(TokenType.COMMA) 2095 indexes.append(index) 2096 elif create_token.token_type == TokenType.VIEW: 2097 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2098 no_schema_binding = True 2099 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2100 extend_props(self._parse_properties()) 2101 2102 shallow = self._match_text_seq("SHALLOW") 2103 2104 if self._match_texts(self.CLONE_KEYWORDS): 2105 copy = self._prev.text.lower() == "copy" 2106 clone = self.expression( 2107 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2108 ) 2109 2110 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2111 return self._parse_as_command(start) 2112 2113 create_kind_text = create_token.text.upper() 2114 return self.expression( 2115 exp.Create, 2116 this=this, 2117 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2118 replace=replace, 2119 refresh=refresh, 2120 unique=unique, 2121 expression=expression, 2122 exists=exists, 2123 properties=properties, 2124 indexes=indexes, 2125 no_schema_binding=no_schema_binding, 2126 begin=begin, 2127 end=end, 2128 clone=clone, 2129 concurrently=concurrently, 2130 clustered=clustered, 2131 ) 2132 2133 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2134 seq = exp.SequenceProperties() 2135 2136 options = [] 2137 index = self._index 2138 2139 while self._curr: 2140 self._match(TokenType.COMMA) 2141 if self._match_text_seq("INCREMENT"): 2142 self._match_text_seq("BY") 2143 self._match_text_seq("=") 2144 seq.set("increment", self._parse_term()) 2145 elif self._match_text_seq("MINVALUE"): 2146 seq.set("minvalue", self._parse_term()) 2147 elif self._match_text_seq("MAXVALUE"): 2148 seq.set("maxvalue", self._parse_term()) 2149 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2150 self._match_text_seq("=") 2151 seq.set("start", self._parse_term()) 2152 elif self._match_text_seq("CACHE"): 2153 # T-SQL allows empty CACHE which is initialized dynamically 2154 seq.set("cache", self._parse_number() or True) 2155 elif self._match_text_seq("OWNED", "BY"): 2156 # "OWNED BY NONE" is the default 2157 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2158 else: 2159 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2160 if opt: 2161 options.append(opt) 2162 else: 2163 break 2164 2165 seq.set("options", options if options else None) 2166 return None if self._index == index else seq 2167 2168 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2169 # only used for teradata currently 2170 self._match(TokenType.COMMA) 2171 2172 kwargs = { 2173 "no": self._match_text_seq("NO"), 2174 "dual": self._match_text_seq("DUAL"), 2175 "before": self._match_text_seq("BEFORE"), 2176 "default": self._match_text_seq("DEFAULT"), 2177 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2178 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2179 "after": self._match_text_seq("AFTER"), 2180 "minimum": self._match_texts(("MIN", "MINIMUM")), 2181 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2182 } 2183 2184 if self._match_texts(self.PROPERTY_PARSERS): 2185 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2186 try: 2187 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2188 except TypeError: 2189 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2190 2191 return None 2192 2193 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2194 return self._parse_wrapped_csv(self._parse_property) 2195 2196 def _parse_property(self) -> t.Optional[exp.Expression]: 2197 if self._match_texts(self.PROPERTY_PARSERS): 2198 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2199 2200 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2201 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2202 2203 if self._match_text_seq("COMPOUND", "SORTKEY"): 2204 return self._parse_sortkey(compound=True) 2205 2206 if self._match_text_seq("SQL", "SECURITY"): 2207 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2208 2209 index = self._index 2210 key = self._parse_column() 2211 2212 if not self._match(TokenType.EQ): 2213 self._retreat(index) 2214 return self._parse_sequence_properties() 2215 2216 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2217 if isinstance(key, exp.Column): 2218 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2219 2220 value = self._parse_bitwise() or self._parse_var(any_token=True) 2221 2222 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2223 if isinstance(value, exp.Column): 2224 value = exp.var(value.name) 2225 2226 return self.expression(exp.Property, this=key, value=value) 2227 2228 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2229 if self._match_text_seq("BY"): 2230 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2231 2232 self._match(TokenType.ALIAS) 2233 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2234 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2235 2236 return self.expression( 2237 exp.FileFormatProperty, 2238 this=( 2239 self.expression( 2240 exp.InputOutputFormat, 2241 input_format=input_format, 2242 output_format=output_format, 2243 ) 2244 if input_format or output_format 2245 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2246 ), 2247 hive_format=True, 2248 ) 2249 2250 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2251 field = self._parse_field() 2252 if isinstance(field, exp.Identifier) and not field.quoted: 2253 field = exp.var(field) 2254 2255 return field 2256 2257 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2258 self._match(TokenType.EQ) 2259 self._match(TokenType.ALIAS) 2260 2261 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2262 2263 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2264 properties = [] 2265 while True: 2266 if before: 2267 prop = self._parse_property_before() 2268 else: 2269 prop = self._parse_property() 2270 if not prop: 2271 break 2272 for p in ensure_list(prop): 2273 properties.append(p) 2274 2275 if properties: 2276 return self.expression(exp.Properties, expressions=properties) 2277 2278 return None 2279 2280 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2281 return self.expression( 2282 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2283 ) 2284 2285 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2286 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2287 security_specifier = self._prev.text.upper() 2288 return self.expression(exp.SecurityProperty, this=security_specifier) 2289 return None 2290 2291 def _parse_settings_property(self) -> exp.SettingsProperty: 2292 return self.expression( 2293 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2294 ) 2295 2296 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2297 if self._index >= 2: 2298 pre_volatile_token = self._tokens[self._index - 2] 2299 else: 2300 pre_volatile_token = None 2301 2302 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2303 return exp.VolatileProperty() 2304 2305 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2306 2307 def _parse_retention_period(self) -> exp.Var: 2308 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2309 number = self._parse_number() 2310 number_str = f"{number} " if number else "" 2311 unit = self._parse_var(any_token=True) 2312 return exp.var(f"{number_str}{unit}") 2313 2314 def _parse_system_versioning_property( 2315 self, with_: bool = False 2316 ) -> exp.WithSystemVersioningProperty: 2317 self._match(TokenType.EQ) 2318 prop = self.expression( 2319 exp.WithSystemVersioningProperty, 2320 **{ # type: ignore 2321 "on": True, 2322 "with": with_, 2323 }, 2324 ) 2325 2326 if self._match_text_seq("OFF"): 2327 prop.set("on", False) 2328 return prop 2329 2330 self._match(TokenType.ON) 2331 if self._match(TokenType.L_PAREN): 2332 while self._curr and not self._match(TokenType.R_PAREN): 2333 if self._match_text_seq("HISTORY_TABLE", "="): 2334 prop.set("this", self._parse_table_parts()) 2335 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2336 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2337 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2338 prop.set("retention_period", self._parse_retention_period()) 2339 2340 self._match(TokenType.COMMA) 2341 2342 return prop 2343 2344 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2345 self._match(TokenType.EQ) 2346 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2347 prop = self.expression(exp.DataDeletionProperty, on=on) 2348 2349 if self._match(TokenType.L_PAREN): 2350 while self._curr and not self._match(TokenType.R_PAREN): 2351 if self._match_text_seq("FILTER_COLUMN", "="): 2352 prop.set("filter_column", self._parse_column()) 2353 elif self._match_text_seq("RETENTION_PERIOD", "="): 2354 prop.set("retention_period", self._parse_retention_period()) 2355 2356 self._match(TokenType.COMMA) 2357 2358 return prop 2359 2360 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2361 kind = "HASH" 2362 expressions: t.Optional[t.List[exp.Expression]] = None 2363 if self._match_text_seq("BY", "HASH"): 2364 expressions = self._parse_wrapped_csv(self._parse_id_var) 2365 elif self._match_text_seq("BY", "RANDOM"): 2366 kind = "RANDOM" 2367 2368 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2369 buckets: t.Optional[exp.Expression] = None 2370 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2371 buckets = self._parse_number() 2372 2373 return self.expression( 2374 exp.DistributedByProperty, 2375 expressions=expressions, 2376 kind=kind, 2377 buckets=buckets, 2378 order=self._parse_order(), 2379 ) 2380 2381 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2382 self._match_text_seq("KEY") 2383 expressions = self._parse_wrapped_id_vars() 2384 return self.expression(expr_type, expressions=expressions) 2385 2386 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2387 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2388 prop = self._parse_system_versioning_property(with_=True) 2389 self._match_r_paren() 2390 return prop 2391 2392 if self._match(TokenType.L_PAREN, advance=False): 2393 return self._parse_wrapped_properties() 2394 2395 if self._match_text_seq("JOURNAL"): 2396 return self._parse_withjournaltable() 2397 2398 if self._match_texts(self.VIEW_ATTRIBUTES): 2399 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2400 2401 if self._match_text_seq("DATA"): 2402 return self._parse_withdata(no=False) 2403 elif self._match_text_seq("NO", "DATA"): 2404 return self._parse_withdata(no=True) 2405 2406 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2407 return self._parse_serde_properties(with_=True) 2408 2409 if self._match(TokenType.SCHEMA): 2410 return self.expression( 2411 exp.WithSchemaBindingProperty, 2412 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2413 ) 2414 2415 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2416 return self.expression( 2417 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2418 ) 2419 2420 if not self._next: 2421 return None 2422 2423 return self._parse_withisolatedloading() 2424 2425 def _parse_procedure_option(self) -> exp.Expression | None: 2426 if self._match_text_seq("EXECUTE", "AS"): 2427 return self.expression( 2428 exp.ExecuteAsProperty, 2429 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2430 or self._parse_string(), 2431 ) 2432 2433 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2434 2435 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2436 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2437 self._match(TokenType.EQ) 2438 2439 user = self._parse_id_var() 2440 self._match(TokenType.PARAMETER) 2441 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2442 2443 if not user or not host: 2444 return None 2445 2446 return exp.DefinerProperty(this=f"{user}@{host}") 2447 2448 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2449 self._match(TokenType.TABLE) 2450 self._match(TokenType.EQ) 2451 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2452 2453 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2454 return self.expression(exp.LogProperty, no=no) 2455 2456 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2457 return self.expression(exp.JournalProperty, **kwargs) 2458 2459 def _parse_checksum(self) -> exp.ChecksumProperty: 2460 self._match(TokenType.EQ) 2461 2462 on = None 2463 if self._match(TokenType.ON): 2464 on = True 2465 elif self._match_text_seq("OFF"): 2466 on = False 2467 2468 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2469 2470 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2471 return self.expression( 2472 exp.Cluster, 2473 expressions=( 2474 self._parse_wrapped_csv(self._parse_ordered) 2475 if wrapped 2476 else self._parse_csv(self._parse_ordered) 2477 ), 2478 ) 2479 2480 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2481 self._match_text_seq("BY") 2482 2483 self._match_l_paren() 2484 expressions = self._parse_csv(self._parse_column) 2485 self._match_r_paren() 2486 2487 if self._match_text_seq("SORTED", "BY"): 2488 self._match_l_paren() 2489 sorted_by = self._parse_csv(self._parse_ordered) 2490 self._match_r_paren() 2491 else: 2492 sorted_by = None 2493 2494 self._match(TokenType.INTO) 2495 buckets = self._parse_number() 2496 self._match_text_seq("BUCKETS") 2497 2498 return self.expression( 2499 exp.ClusteredByProperty, 2500 expressions=expressions, 2501 sorted_by=sorted_by, 2502 buckets=buckets, 2503 ) 2504 2505 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2506 if not self._match_text_seq("GRANTS"): 2507 self._retreat(self._index - 1) 2508 return None 2509 2510 return self.expression(exp.CopyGrantsProperty) 2511 2512 def _parse_freespace(self) -> exp.FreespaceProperty: 2513 self._match(TokenType.EQ) 2514 return self.expression( 2515 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2516 ) 2517 2518 def _parse_mergeblockratio( 2519 self, no: bool = False, default: bool = False 2520 ) -> exp.MergeBlockRatioProperty: 2521 if self._match(TokenType.EQ): 2522 return self.expression( 2523 exp.MergeBlockRatioProperty, 2524 this=self._parse_number(), 2525 percent=self._match(TokenType.PERCENT), 2526 ) 2527 2528 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2529 2530 def _parse_datablocksize( 2531 self, 2532 default: t.Optional[bool] = None, 2533 minimum: t.Optional[bool] = None, 2534 maximum: t.Optional[bool] = None, 2535 ) -> exp.DataBlocksizeProperty: 2536 self._match(TokenType.EQ) 2537 size = self._parse_number() 2538 2539 units = None 2540 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2541 units = self._prev.text 2542 2543 return self.expression( 2544 exp.DataBlocksizeProperty, 2545 size=size, 2546 units=units, 2547 default=default, 2548 minimum=minimum, 2549 maximum=maximum, 2550 ) 2551 2552 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2553 self._match(TokenType.EQ) 2554 always = self._match_text_seq("ALWAYS") 2555 manual = self._match_text_seq("MANUAL") 2556 never = self._match_text_seq("NEVER") 2557 default = self._match_text_seq("DEFAULT") 2558 2559 autotemp = None 2560 if self._match_text_seq("AUTOTEMP"): 2561 autotemp = self._parse_schema() 2562 2563 return self.expression( 2564 exp.BlockCompressionProperty, 2565 always=always, 2566 manual=manual, 2567 never=never, 2568 default=default, 2569 autotemp=autotemp, 2570 ) 2571 2572 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2573 index = self._index 2574 no = self._match_text_seq("NO") 2575 concurrent = self._match_text_seq("CONCURRENT") 2576 2577 if not self._match_text_seq("ISOLATED", "LOADING"): 2578 self._retreat(index) 2579 return None 2580 2581 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2582 return self.expression( 2583 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2584 ) 2585 2586 def _parse_locking(self) -> exp.LockingProperty: 2587 if self._match(TokenType.TABLE): 2588 kind = "TABLE" 2589 elif self._match(TokenType.VIEW): 2590 kind = "VIEW" 2591 elif self._match(TokenType.ROW): 2592 kind = "ROW" 2593 elif self._match_text_seq("DATABASE"): 2594 kind = "DATABASE" 2595 else: 2596 kind = None 2597 2598 if kind in ("DATABASE", "TABLE", "VIEW"): 2599 this = self._parse_table_parts() 2600 else: 2601 this = None 2602 2603 if self._match(TokenType.FOR): 2604 for_or_in = "FOR" 2605 elif self._match(TokenType.IN): 2606 for_or_in = "IN" 2607 else: 2608 for_or_in = None 2609 2610 if self._match_text_seq("ACCESS"): 2611 lock_type = "ACCESS" 2612 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2613 lock_type = "EXCLUSIVE" 2614 elif self._match_text_seq("SHARE"): 2615 lock_type = "SHARE" 2616 elif self._match_text_seq("READ"): 2617 lock_type = "READ" 2618 elif self._match_text_seq("WRITE"): 2619 lock_type = "WRITE" 2620 elif self._match_text_seq("CHECKSUM"): 2621 lock_type = "CHECKSUM" 2622 else: 2623 lock_type = None 2624 2625 override = self._match_text_seq("OVERRIDE") 2626 2627 return self.expression( 2628 exp.LockingProperty, 2629 this=this, 2630 kind=kind, 2631 for_or_in=for_or_in, 2632 lock_type=lock_type, 2633 override=override, 2634 ) 2635 2636 def _parse_partition_by(self) -> t.List[exp.Expression]: 2637 if self._match(TokenType.PARTITION_BY): 2638 return self._parse_csv(self._parse_assignment) 2639 return [] 2640 2641 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2642 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2643 if self._match_text_seq("MINVALUE"): 2644 return exp.var("MINVALUE") 2645 if self._match_text_seq("MAXVALUE"): 2646 return exp.var("MAXVALUE") 2647 return self._parse_bitwise() 2648 2649 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2650 expression = None 2651 from_expressions = None 2652 to_expressions = None 2653 2654 if self._match(TokenType.IN): 2655 this = self._parse_wrapped_csv(self._parse_bitwise) 2656 elif self._match(TokenType.FROM): 2657 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2658 self._match_text_seq("TO") 2659 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2660 elif self._match_text_seq("WITH", "(", "MODULUS"): 2661 this = self._parse_number() 2662 self._match_text_seq(",", "REMAINDER") 2663 expression = self._parse_number() 2664 self._match_r_paren() 2665 else: 2666 self.raise_error("Failed to parse partition bound spec.") 2667 2668 return self.expression( 2669 exp.PartitionBoundSpec, 2670 this=this, 2671 expression=expression, 2672 from_expressions=from_expressions, 2673 to_expressions=to_expressions, 2674 ) 2675 2676 # https://www.postgresql.org/docs/current/sql-createtable.html 2677 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2678 if not self._match_text_seq("OF"): 2679 self._retreat(self._index - 1) 2680 return None 2681 2682 this = self._parse_table(schema=True) 2683 2684 if self._match(TokenType.DEFAULT): 2685 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2686 elif self._match_text_seq("FOR", "VALUES"): 2687 expression = self._parse_partition_bound_spec() 2688 else: 2689 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2690 2691 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2692 2693 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2694 self._match(TokenType.EQ) 2695 return self.expression( 2696 exp.PartitionedByProperty, 2697 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2698 ) 2699 2700 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2701 if self._match_text_seq("AND", "STATISTICS"): 2702 statistics = True 2703 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2704 statistics = False 2705 else: 2706 statistics = None 2707 2708 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2709 2710 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2711 if self._match_text_seq("SQL"): 2712 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2713 return None 2714 2715 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2716 if self._match_text_seq("SQL", "DATA"): 2717 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2718 return None 2719 2720 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2721 if self._match_text_seq("PRIMARY", "INDEX"): 2722 return exp.NoPrimaryIndexProperty() 2723 if self._match_text_seq("SQL"): 2724 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2725 return None 2726 2727 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2728 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2729 return exp.OnCommitProperty() 2730 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2731 return exp.OnCommitProperty(delete=True) 2732 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2733 2734 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2735 if self._match_text_seq("SQL", "DATA"): 2736 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2737 return None 2738 2739 def _parse_distkey(self) -> exp.DistKeyProperty: 2740 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2741 2742 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2743 table = self._parse_table(schema=True) 2744 2745 options = [] 2746 while self._match_texts(("INCLUDING", "EXCLUDING")): 2747 this = self._prev.text.upper() 2748 2749 id_var = self._parse_id_var() 2750 if not id_var: 2751 return None 2752 2753 options.append( 2754 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2755 ) 2756 2757 return self.expression(exp.LikeProperty, this=table, expressions=options) 2758 2759 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2760 return self.expression( 2761 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2762 ) 2763 2764 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2765 self._match(TokenType.EQ) 2766 return self.expression( 2767 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2768 ) 2769 2770 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2771 self._match_text_seq("WITH", "CONNECTION") 2772 return self.expression( 2773 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2774 ) 2775 2776 def _parse_returns(self) -> exp.ReturnsProperty: 2777 value: t.Optional[exp.Expression] 2778 null = None 2779 is_table = self._match(TokenType.TABLE) 2780 2781 if is_table: 2782 if self._match(TokenType.LT): 2783 value = self.expression( 2784 exp.Schema, 2785 this="TABLE", 2786 expressions=self._parse_csv(self._parse_struct_types), 2787 ) 2788 if not self._match(TokenType.GT): 2789 self.raise_error("Expecting >") 2790 else: 2791 value = self._parse_schema(exp.var("TABLE")) 2792 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2793 null = True 2794 value = None 2795 else: 2796 value = self._parse_types() 2797 2798 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2799 2800 def _parse_describe(self) -> exp.Describe: 2801 kind = self._match_set(self.CREATABLES) and self._prev.text 2802 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2803 if self._match(TokenType.DOT): 2804 style = None 2805 self._retreat(self._index - 2) 2806 2807 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2808 2809 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2810 this = self._parse_statement() 2811 else: 2812 this = self._parse_table(schema=True) 2813 2814 properties = self._parse_properties() 2815 expressions = properties.expressions if properties else None 2816 partition = self._parse_partition() 2817 return self.expression( 2818 exp.Describe, 2819 this=this, 2820 style=style, 2821 kind=kind, 2822 expressions=expressions, 2823 partition=partition, 2824 format=format, 2825 ) 2826 2827 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2828 kind = self._prev.text.upper() 2829 expressions = [] 2830 2831 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2832 if self._match(TokenType.WHEN): 2833 expression = self._parse_disjunction() 2834 self._match(TokenType.THEN) 2835 else: 2836 expression = None 2837 2838 else_ = self._match(TokenType.ELSE) 2839 2840 if not self._match(TokenType.INTO): 2841 return None 2842 2843 return self.expression( 2844 exp.ConditionalInsert, 2845 this=self.expression( 2846 exp.Insert, 2847 this=self._parse_table(schema=True), 2848 expression=self._parse_derived_table_values(), 2849 ), 2850 expression=expression, 2851 else_=else_, 2852 ) 2853 2854 expression = parse_conditional_insert() 2855 while expression is not None: 2856 expressions.append(expression) 2857 expression = parse_conditional_insert() 2858 2859 return self.expression( 2860 exp.MultitableInserts, 2861 kind=kind, 2862 comments=comments, 2863 expressions=expressions, 2864 source=self._parse_table(), 2865 ) 2866 2867 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2868 comments = [] 2869 hint = self._parse_hint() 2870 overwrite = self._match(TokenType.OVERWRITE) 2871 ignore = self._match(TokenType.IGNORE) 2872 local = self._match_text_seq("LOCAL") 2873 alternative = None 2874 is_function = None 2875 2876 if self._match_text_seq("DIRECTORY"): 2877 this: t.Optional[exp.Expression] = self.expression( 2878 exp.Directory, 2879 this=self._parse_var_or_string(), 2880 local=local, 2881 row_format=self._parse_row_format(match_row=True), 2882 ) 2883 else: 2884 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2885 comments += ensure_list(self._prev_comments) 2886 return self._parse_multitable_inserts(comments) 2887 2888 if self._match(TokenType.OR): 2889 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2890 2891 self._match(TokenType.INTO) 2892 comments += ensure_list(self._prev_comments) 2893 self._match(TokenType.TABLE) 2894 is_function = self._match(TokenType.FUNCTION) 2895 2896 this = ( 2897 self._parse_table(schema=True, parse_partition=True) 2898 if not is_function 2899 else self._parse_function() 2900 ) 2901 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2902 this.set("alias", self._parse_table_alias()) 2903 2904 returning = self._parse_returning() 2905 2906 return self.expression( 2907 exp.Insert, 2908 comments=comments, 2909 hint=hint, 2910 is_function=is_function, 2911 this=this, 2912 stored=self._match_text_seq("STORED") and self._parse_stored(), 2913 by_name=self._match_text_seq("BY", "NAME"), 2914 exists=self._parse_exists(), 2915 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2916 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2917 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2918 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2919 conflict=self._parse_on_conflict(), 2920 returning=returning or self._parse_returning(), 2921 overwrite=overwrite, 2922 alternative=alternative, 2923 ignore=ignore, 2924 source=self._match(TokenType.TABLE) and self._parse_table(), 2925 ) 2926 2927 def _parse_kill(self) -> exp.Kill: 2928 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2929 2930 return self.expression( 2931 exp.Kill, 2932 this=self._parse_primary(), 2933 kind=kind, 2934 ) 2935 2936 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2937 conflict = self._match_text_seq("ON", "CONFLICT") 2938 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2939 2940 if not conflict and not duplicate: 2941 return None 2942 2943 conflict_keys = None 2944 constraint = None 2945 2946 if conflict: 2947 if self._match_text_seq("ON", "CONSTRAINT"): 2948 constraint = self._parse_id_var() 2949 elif self._match(TokenType.L_PAREN): 2950 conflict_keys = self._parse_csv(self._parse_id_var) 2951 self._match_r_paren() 2952 2953 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2954 if self._prev.token_type == TokenType.UPDATE: 2955 self._match(TokenType.SET) 2956 expressions = self._parse_csv(self._parse_equality) 2957 else: 2958 expressions = None 2959 2960 return self.expression( 2961 exp.OnConflict, 2962 duplicate=duplicate, 2963 expressions=expressions, 2964 action=action, 2965 conflict_keys=conflict_keys, 2966 constraint=constraint, 2967 where=self._parse_where(), 2968 ) 2969 2970 def _parse_returning(self) -> t.Optional[exp.Returning]: 2971 if not self._match(TokenType.RETURNING): 2972 return None 2973 return self.expression( 2974 exp.Returning, 2975 expressions=self._parse_csv(self._parse_expression), 2976 into=self._match(TokenType.INTO) and self._parse_table_part(), 2977 ) 2978 2979 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2980 if not self._match(TokenType.FORMAT): 2981 return None 2982 return self._parse_row_format() 2983 2984 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2985 index = self._index 2986 with_ = with_ or self._match_text_seq("WITH") 2987 2988 if not self._match(TokenType.SERDE_PROPERTIES): 2989 self._retreat(index) 2990 return None 2991 return self.expression( 2992 exp.SerdeProperties, 2993 **{ # type: ignore 2994 "expressions": self._parse_wrapped_properties(), 2995 "with": with_, 2996 }, 2997 ) 2998 2999 def _parse_row_format( 3000 self, match_row: bool = False 3001 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3002 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3003 return None 3004 3005 if self._match_text_seq("SERDE"): 3006 this = self._parse_string() 3007 3008 serde_properties = self._parse_serde_properties() 3009 3010 return self.expression( 3011 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3012 ) 3013 3014 self._match_text_seq("DELIMITED") 3015 3016 kwargs = {} 3017 3018 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3019 kwargs["fields"] = self._parse_string() 3020 if self._match_text_seq("ESCAPED", "BY"): 3021 kwargs["escaped"] = self._parse_string() 3022 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3023 kwargs["collection_items"] = self._parse_string() 3024 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3025 kwargs["map_keys"] = self._parse_string() 3026 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3027 kwargs["lines"] = self._parse_string() 3028 if self._match_text_seq("NULL", "DEFINED", "AS"): 3029 kwargs["null"] = self._parse_string() 3030 3031 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3032 3033 def _parse_load(self) -> exp.LoadData | exp.Command: 3034 if self._match_text_seq("DATA"): 3035 local = self._match_text_seq("LOCAL") 3036 self._match_text_seq("INPATH") 3037 inpath = self._parse_string() 3038 overwrite = self._match(TokenType.OVERWRITE) 3039 self._match_pair(TokenType.INTO, TokenType.TABLE) 3040 3041 return self.expression( 3042 exp.LoadData, 3043 this=self._parse_table(schema=True), 3044 local=local, 3045 overwrite=overwrite, 3046 inpath=inpath, 3047 partition=self._parse_partition(), 3048 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3049 serde=self._match_text_seq("SERDE") and self._parse_string(), 3050 ) 3051 return self._parse_as_command(self._prev) 3052 3053 def _parse_delete(self) -> exp.Delete: 3054 # This handles MySQL's "Multiple-Table Syntax" 3055 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3056 tables = None 3057 if not self._match(TokenType.FROM, advance=False): 3058 tables = self._parse_csv(self._parse_table) or None 3059 3060 returning = self._parse_returning() 3061 3062 return self.expression( 3063 exp.Delete, 3064 tables=tables, 3065 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3066 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3067 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3068 where=self._parse_where(), 3069 returning=returning or self._parse_returning(), 3070 limit=self._parse_limit(), 3071 ) 3072 3073 def _parse_update(self) -> exp.Update: 3074 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3075 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3076 returning = self._parse_returning() 3077 return self.expression( 3078 exp.Update, 3079 **{ # type: ignore 3080 "this": this, 3081 "expressions": expressions, 3082 "from": self._parse_from(joins=True), 3083 "where": self._parse_where(), 3084 "returning": returning or self._parse_returning(), 3085 "order": self._parse_order(), 3086 "limit": self._parse_limit(), 3087 }, 3088 ) 3089 3090 def _parse_use(self) -> exp.Use: 3091 return self.expression( 3092 exp.Use, 3093 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3094 this=self._parse_table(schema=False), 3095 ) 3096 3097 def _parse_uncache(self) -> exp.Uncache: 3098 if not self._match(TokenType.TABLE): 3099 self.raise_error("Expecting TABLE after UNCACHE") 3100 3101 return self.expression( 3102 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3103 ) 3104 3105 def _parse_cache(self) -> exp.Cache: 3106 lazy = self._match_text_seq("LAZY") 3107 self._match(TokenType.TABLE) 3108 table = self._parse_table(schema=True) 3109 3110 options = [] 3111 if self._match_text_seq("OPTIONS"): 3112 self._match_l_paren() 3113 k = self._parse_string() 3114 self._match(TokenType.EQ) 3115 v = self._parse_string() 3116 options = [k, v] 3117 self._match_r_paren() 3118 3119 self._match(TokenType.ALIAS) 3120 return self.expression( 3121 exp.Cache, 3122 this=table, 3123 lazy=lazy, 3124 options=options, 3125 expression=self._parse_select(nested=True), 3126 ) 3127 3128 def _parse_partition(self) -> t.Optional[exp.Partition]: 3129 if not self._match_texts(self.PARTITION_KEYWORDS): 3130 return None 3131 3132 return self.expression( 3133 exp.Partition, 3134 subpartition=self._prev.text.upper() == "SUBPARTITION", 3135 expressions=self._parse_wrapped_csv(self._parse_assignment), 3136 ) 3137 3138 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3139 def _parse_value_expression() -> t.Optional[exp.Expression]: 3140 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3141 return exp.var(self._prev.text.upper()) 3142 return self._parse_expression() 3143 3144 if self._match(TokenType.L_PAREN): 3145 expressions = self._parse_csv(_parse_value_expression) 3146 self._match_r_paren() 3147 return self.expression(exp.Tuple, expressions=expressions) 3148 3149 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3150 expression = self._parse_expression() 3151 if expression: 3152 return self.expression(exp.Tuple, expressions=[expression]) 3153 return None 3154 3155 def _parse_projections(self) -> t.List[exp.Expression]: 3156 return self._parse_expressions() 3157 3158 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3159 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3160 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3161 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3162 ) 3163 elif self._match(TokenType.FROM): 3164 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3165 # Support parentheses for duckdb FROM-first syntax 3166 select = self._parse_select() 3167 if select: 3168 select.set("from", from_) 3169 this = select 3170 else: 3171 this = exp.select("*").from_(t.cast(exp.From, from_)) 3172 else: 3173 this = ( 3174 self._parse_table(consume_pipe=True) 3175 if table 3176 else self._parse_select(nested=True, parse_set_operation=False) 3177 ) 3178 3179 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3180 # in case a modifier (e.g. join) is following 3181 if table and isinstance(this, exp.Values) and this.alias: 3182 alias = this.args["alias"].pop() 3183 this = exp.Table(this=this, alias=alias) 3184 3185 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3186 3187 return this 3188 3189 def _parse_select( 3190 self, 3191 nested: bool = False, 3192 table: bool = False, 3193 parse_subquery_alias: bool = True, 3194 parse_set_operation: bool = True, 3195 consume_pipe: bool = True, 3196 ) -> t.Optional[exp.Expression]: 3197 query = self._parse_select_query( 3198 nested=nested, 3199 table=table, 3200 parse_subquery_alias=parse_subquery_alias, 3201 parse_set_operation=parse_set_operation, 3202 ) 3203 3204 if ( 3205 consume_pipe 3206 and self._match(TokenType.PIPE_GT, advance=False) 3207 and isinstance(query, exp.Query) 3208 ): 3209 query = self._parse_pipe_syntax_query(query) 3210 query = query.subquery(copy=False) if query and table else query 3211 3212 return query 3213 3214 def _parse_select_query( 3215 self, 3216 nested: bool = False, 3217 table: bool = False, 3218 parse_subquery_alias: bool = True, 3219 parse_set_operation: bool = True, 3220 ) -> t.Optional[exp.Expression]: 3221 cte = self._parse_with() 3222 3223 if cte: 3224 this = self._parse_statement() 3225 3226 if not this: 3227 self.raise_error("Failed to parse any statement following CTE") 3228 return cte 3229 3230 if "with" in this.arg_types: 3231 this.set("with", cte) 3232 else: 3233 self.raise_error(f"{this.key} does not support CTE") 3234 this = cte 3235 3236 return this 3237 3238 # duckdb supports leading with FROM x 3239 from_ = ( 3240 self._parse_from(consume_pipe=True) 3241 if self._match(TokenType.FROM, advance=False) 3242 else None 3243 ) 3244 3245 if self._match(TokenType.SELECT): 3246 comments = self._prev_comments 3247 3248 hint = self._parse_hint() 3249 3250 if self._next and not self._next.token_type == TokenType.DOT: 3251 all_ = self._match(TokenType.ALL) 3252 distinct = self._match_set(self.DISTINCT_TOKENS) 3253 else: 3254 all_, distinct = None, None 3255 3256 kind = ( 3257 self._match(TokenType.ALIAS) 3258 and self._match_texts(("STRUCT", "VALUE")) 3259 and self._prev.text.upper() 3260 ) 3261 3262 if distinct: 3263 distinct = self.expression( 3264 exp.Distinct, 3265 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3266 ) 3267 3268 if all_ and distinct: 3269 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3270 3271 operation_modifiers = [] 3272 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3273 operation_modifiers.append(exp.var(self._prev.text.upper())) 3274 3275 limit = self._parse_limit(top=True) 3276 projections = self._parse_projections() 3277 3278 this = self.expression( 3279 exp.Select, 3280 kind=kind, 3281 hint=hint, 3282 distinct=distinct, 3283 expressions=projections, 3284 limit=limit, 3285 operation_modifiers=operation_modifiers or None, 3286 ) 3287 this.comments = comments 3288 3289 into = self._parse_into() 3290 if into: 3291 this.set("into", into) 3292 3293 if not from_: 3294 from_ = self._parse_from() 3295 3296 if from_: 3297 this.set("from", from_) 3298 3299 this = self._parse_query_modifiers(this) 3300 elif (table or nested) and self._match(TokenType.L_PAREN): 3301 this = self._parse_wrapped_select(table=table) 3302 3303 # We return early here so that the UNION isn't attached to the subquery by the 3304 # following call to _parse_set_operations, but instead becomes the parent node 3305 self._match_r_paren() 3306 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3307 elif self._match(TokenType.VALUES, advance=False): 3308 this = self._parse_derived_table_values() 3309 elif from_: 3310 this = exp.select("*").from_(from_.this, copy=False) 3311 elif self._match(TokenType.SUMMARIZE): 3312 table = self._match(TokenType.TABLE) 3313 this = self._parse_select() or self._parse_string() or self._parse_table() 3314 return self.expression(exp.Summarize, this=this, table=table) 3315 elif self._match(TokenType.DESCRIBE): 3316 this = self._parse_describe() 3317 elif self._match_text_seq("STREAM"): 3318 this = self._parse_function() 3319 if this: 3320 this = self.expression(exp.Stream, this=this) 3321 else: 3322 self._retreat(self._index - 1) 3323 else: 3324 this = None 3325 3326 return self._parse_set_operations(this) if parse_set_operation else this 3327 3328 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3329 self._match_text_seq("SEARCH") 3330 3331 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3332 3333 if not kind: 3334 return None 3335 3336 self._match_text_seq("FIRST", "BY") 3337 3338 return self.expression( 3339 exp.RecursiveWithSearch, 3340 kind=kind, 3341 this=self._parse_id_var(), 3342 expression=self._match_text_seq("SET") and self._parse_id_var(), 3343 using=self._match_text_seq("USING") and self._parse_id_var(), 3344 ) 3345 3346 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3347 if not skip_with_token and not self._match(TokenType.WITH): 3348 return None 3349 3350 comments = self._prev_comments 3351 recursive = self._match(TokenType.RECURSIVE) 3352 3353 last_comments = None 3354 expressions = [] 3355 while True: 3356 cte = self._parse_cte() 3357 if isinstance(cte, exp.CTE): 3358 expressions.append(cte) 3359 if last_comments: 3360 cte.add_comments(last_comments) 3361 3362 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3363 break 3364 else: 3365 self._match(TokenType.WITH) 3366 3367 last_comments = self._prev_comments 3368 3369 return self.expression( 3370 exp.With, 3371 comments=comments, 3372 expressions=expressions, 3373 recursive=recursive, 3374 search=self._parse_recursive_with_search(), 3375 ) 3376 3377 def _parse_cte(self) -> t.Optional[exp.CTE]: 3378 index = self._index 3379 3380 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3381 if not alias or not alias.this: 3382 self.raise_error("Expected CTE to have alias") 3383 3384 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3385 self._retreat(index) 3386 return None 3387 3388 comments = self._prev_comments 3389 3390 if self._match_text_seq("NOT", "MATERIALIZED"): 3391 materialized = False 3392 elif self._match_text_seq("MATERIALIZED"): 3393 materialized = True 3394 else: 3395 materialized = None 3396 3397 cte = self.expression( 3398 exp.CTE, 3399 this=self._parse_wrapped(self._parse_statement), 3400 alias=alias, 3401 materialized=materialized, 3402 comments=comments, 3403 ) 3404 3405 values = cte.this 3406 if isinstance(values, exp.Values): 3407 if values.alias: 3408 cte.set("this", exp.select("*").from_(values)) 3409 else: 3410 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3411 3412 return cte 3413 3414 def _parse_table_alias( 3415 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3416 ) -> t.Optional[exp.TableAlias]: 3417 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3418 # so this section tries to parse the clause version and if it fails, it treats the token 3419 # as an identifier (alias) 3420 if self._can_parse_limit_or_offset(): 3421 return None 3422 3423 any_token = self._match(TokenType.ALIAS) 3424 alias = ( 3425 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3426 or self._parse_string_as_identifier() 3427 ) 3428 3429 index = self._index 3430 if self._match(TokenType.L_PAREN): 3431 columns = self._parse_csv(self._parse_function_parameter) 3432 self._match_r_paren() if columns else self._retreat(index) 3433 else: 3434 columns = None 3435 3436 if not alias and not columns: 3437 return None 3438 3439 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3440 3441 # We bubble up comments from the Identifier to the TableAlias 3442 if isinstance(alias, exp.Identifier): 3443 table_alias.add_comments(alias.pop_comments()) 3444 3445 return table_alias 3446 3447 def _parse_subquery( 3448 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3449 ) -> t.Optional[exp.Subquery]: 3450 if not this: 3451 return None 3452 3453 return self.expression( 3454 exp.Subquery, 3455 this=this, 3456 pivots=self._parse_pivots(), 3457 alias=self._parse_table_alias() if parse_alias else None, 3458 sample=self._parse_table_sample(), 3459 ) 3460 3461 def _implicit_unnests_to_explicit(self, this: E) -> E: 3462 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3463 3464 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3465 for i, join in enumerate(this.args.get("joins") or []): 3466 table = join.this 3467 normalized_table = table.copy() 3468 normalized_table.meta["maybe_column"] = True 3469 normalized_table = _norm(normalized_table, dialect=self.dialect) 3470 3471 if isinstance(table, exp.Table) and not join.args.get("on"): 3472 if normalized_table.parts[0].name in refs: 3473 table_as_column = table.to_column() 3474 unnest = exp.Unnest(expressions=[table_as_column]) 3475 3476 # Table.to_column creates a parent Alias node that we want to convert to 3477 # a TableAlias and attach to the Unnest, so it matches the parser's output 3478 if isinstance(table.args.get("alias"), exp.TableAlias): 3479 table_as_column.replace(table_as_column.this) 3480 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3481 3482 table.replace(unnest) 3483 3484 refs.add(normalized_table.alias_or_name) 3485 3486 return this 3487 3488 def _parse_query_modifiers( 3489 self, this: t.Optional[exp.Expression] 3490 ) -> t.Optional[exp.Expression]: 3491 if isinstance(this, self.MODIFIABLES): 3492 for join in self._parse_joins(): 3493 this.append("joins", join) 3494 for lateral in iter(self._parse_lateral, None): 3495 this.append("laterals", lateral) 3496 3497 while True: 3498 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3499 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3500 key, expression = parser(self) 3501 3502 if expression: 3503 this.set(key, expression) 3504 if key == "limit": 3505 offset = expression.args.pop("offset", None) 3506 3507 if offset: 3508 offset = exp.Offset(expression=offset) 3509 this.set("offset", offset) 3510 3511 limit_by_expressions = expression.expressions 3512 expression.set("expressions", None) 3513 offset.set("expressions", limit_by_expressions) 3514 continue 3515 break 3516 3517 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3518 this = self._implicit_unnests_to_explicit(this) 3519 3520 return this 3521 3522 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3523 start = self._curr 3524 while self._curr: 3525 self._advance() 3526 3527 end = self._tokens[self._index - 1] 3528 return exp.Hint(expressions=[self._find_sql(start, end)]) 3529 3530 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3531 return self._parse_function_call() 3532 3533 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3534 start_index = self._index 3535 should_fallback_to_string = False 3536 3537 hints = [] 3538 try: 3539 for hint in iter( 3540 lambda: self._parse_csv( 3541 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3542 ), 3543 [], 3544 ): 3545 hints.extend(hint) 3546 except ParseError: 3547 should_fallback_to_string = True 3548 3549 if should_fallback_to_string or self._curr: 3550 self._retreat(start_index) 3551 return self._parse_hint_fallback_to_string() 3552 3553 return self.expression(exp.Hint, expressions=hints) 3554 3555 def _parse_hint(self) -> t.Optional[exp.Hint]: 3556 if self._match(TokenType.HINT) and self._prev_comments: 3557 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3558 3559 return None 3560 3561 def _parse_into(self) -> t.Optional[exp.Into]: 3562 if not self._match(TokenType.INTO): 3563 return None 3564 3565 temp = self._match(TokenType.TEMPORARY) 3566 unlogged = self._match_text_seq("UNLOGGED") 3567 self._match(TokenType.TABLE) 3568 3569 return self.expression( 3570 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3571 ) 3572 3573 def _parse_from( 3574 self, 3575 joins: bool = False, 3576 skip_from_token: bool = False, 3577 consume_pipe: bool = False, 3578 ) -> t.Optional[exp.From]: 3579 if not skip_from_token and not self._match(TokenType.FROM): 3580 return None 3581 3582 return self.expression( 3583 exp.From, 3584 comments=self._prev_comments, 3585 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3586 ) 3587 3588 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3589 return self.expression( 3590 exp.MatchRecognizeMeasure, 3591 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3592 this=self._parse_expression(), 3593 ) 3594 3595 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3596 if not self._match(TokenType.MATCH_RECOGNIZE): 3597 return None 3598 3599 self._match_l_paren() 3600 3601 partition = self._parse_partition_by() 3602 order = self._parse_order() 3603 3604 measures = ( 3605 self._parse_csv(self._parse_match_recognize_measure) 3606 if self._match_text_seq("MEASURES") 3607 else None 3608 ) 3609 3610 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3611 rows = exp.var("ONE ROW PER MATCH") 3612 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3613 text = "ALL ROWS PER MATCH" 3614 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3615 text += " SHOW EMPTY MATCHES" 3616 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3617 text += " OMIT EMPTY MATCHES" 3618 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3619 text += " WITH UNMATCHED ROWS" 3620 rows = exp.var(text) 3621 else: 3622 rows = None 3623 3624 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3625 text = "AFTER MATCH SKIP" 3626 if self._match_text_seq("PAST", "LAST", "ROW"): 3627 text += " PAST LAST ROW" 3628 elif self._match_text_seq("TO", "NEXT", "ROW"): 3629 text += " TO NEXT ROW" 3630 elif self._match_text_seq("TO", "FIRST"): 3631 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3632 elif self._match_text_seq("TO", "LAST"): 3633 text += f" TO LAST {self._advance_any().text}" # type: ignore 3634 after = exp.var(text) 3635 else: 3636 after = None 3637 3638 if self._match_text_seq("PATTERN"): 3639 self._match_l_paren() 3640 3641 if not self._curr: 3642 self.raise_error("Expecting )", self._curr) 3643 3644 paren = 1 3645 start = self._curr 3646 3647 while self._curr and paren > 0: 3648 if self._curr.token_type == TokenType.L_PAREN: 3649 paren += 1 3650 if self._curr.token_type == TokenType.R_PAREN: 3651 paren -= 1 3652 3653 end = self._prev 3654 self._advance() 3655 3656 if paren > 0: 3657 self.raise_error("Expecting )", self._curr) 3658 3659 pattern = exp.var(self._find_sql(start, end)) 3660 else: 3661 pattern = None 3662 3663 define = ( 3664 self._parse_csv(self._parse_name_as_expression) 3665 if self._match_text_seq("DEFINE") 3666 else None 3667 ) 3668 3669 self._match_r_paren() 3670 3671 return self.expression( 3672 exp.MatchRecognize, 3673 partition_by=partition, 3674 order=order, 3675 measures=measures, 3676 rows=rows, 3677 after=after, 3678 pattern=pattern, 3679 define=define, 3680 alias=self._parse_table_alias(), 3681 ) 3682 3683 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3684 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3685 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3686 cross_apply = False 3687 3688 if cross_apply is not None: 3689 this = self._parse_select(table=True) 3690 view = None 3691 outer = None 3692 elif self._match(TokenType.LATERAL): 3693 this = self._parse_select(table=True) 3694 view = self._match(TokenType.VIEW) 3695 outer = self._match(TokenType.OUTER) 3696 else: 3697 return None 3698 3699 if not this: 3700 this = ( 3701 self._parse_unnest() 3702 or self._parse_function() 3703 or self._parse_id_var(any_token=False) 3704 ) 3705 3706 while self._match(TokenType.DOT): 3707 this = exp.Dot( 3708 this=this, 3709 expression=self._parse_function() or self._parse_id_var(any_token=False), 3710 ) 3711 3712 ordinality: t.Optional[bool] = None 3713 3714 if view: 3715 table = self._parse_id_var(any_token=False) 3716 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3717 table_alias: t.Optional[exp.TableAlias] = self.expression( 3718 exp.TableAlias, this=table, columns=columns 3719 ) 3720 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3721 # We move the alias from the lateral's child node to the lateral itself 3722 table_alias = this.args["alias"].pop() 3723 else: 3724 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3725 table_alias = self._parse_table_alias() 3726 3727 return self.expression( 3728 exp.Lateral, 3729 this=this, 3730 view=view, 3731 outer=outer, 3732 alias=table_alias, 3733 cross_apply=cross_apply, 3734 ordinality=ordinality, 3735 ) 3736 3737 def _parse_join_parts( 3738 self, 3739 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3740 return ( 3741 self._match_set(self.JOIN_METHODS) and self._prev, 3742 self._match_set(self.JOIN_SIDES) and self._prev, 3743 self._match_set(self.JOIN_KINDS) and self._prev, 3744 ) 3745 3746 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3747 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3748 this = self._parse_column() 3749 if isinstance(this, exp.Column): 3750 return this.this 3751 return this 3752 3753 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3754 3755 def _parse_join( 3756 self, skip_join_token: bool = False, parse_bracket: bool = False 3757 ) -> t.Optional[exp.Join]: 3758 if self._match(TokenType.COMMA): 3759 table = self._try_parse(self._parse_table) 3760 cross_join = self.expression(exp.Join, this=table) if table else None 3761 3762 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3763 cross_join.set("kind", "CROSS") 3764 3765 return cross_join 3766 3767 index = self._index 3768 method, side, kind = self._parse_join_parts() 3769 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3770 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3771 join_comments = self._prev_comments 3772 3773 if not skip_join_token and not join: 3774 self._retreat(index) 3775 kind = None 3776 method = None 3777 side = None 3778 3779 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3780 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3781 3782 if not skip_join_token and not join and not outer_apply and not cross_apply: 3783 return None 3784 3785 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3786 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3787 kwargs["expressions"] = self._parse_csv( 3788 lambda: self._parse_table(parse_bracket=parse_bracket) 3789 ) 3790 3791 if method: 3792 kwargs["method"] = method.text 3793 if side: 3794 kwargs["side"] = side.text 3795 if kind: 3796 kwargs["kind"] = kind.text 3797 if hint: 3798 kwargs["hint"] = hint 3799 3800 if self._match(TokenType.MATCH_CONDITION): 3801 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3802 3803 if self._match(TokenType.ON): 3804 kwargs["on"] = self._parse_assignment() 3805 elif self._match(TokenType.USING): 3806 kwargs["using"] = self._parse_using_identifiers() 3807 elif ( 3808 not (outer_apply or cross_apply) 3809 and not isinstance(kwargs["this"], exp.Unnest) 3810 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3811 ): 3812 index = self._index 3813 joins: t.Optional[list] = list(self._parse_joins()) 3814 3815 if joins and self._match(TokenType.ON): 3816 kwargs["on"] = self._parse_assignment() 3817 elif joins and self._match(TokenType.USING): 3818 kwargs["using"] = self._parse_using_identifiers() 3819 else: 3820 joins = None 3821 self._retreat(index) 3822 3823 kwargs["this"].set("joins", joins if joins else None) 3824 3825 kwargs["pivots"] = self._parse_pivots() 3826 3827 comments = [c for token in (method, side, kind) if token for c in token.comments] 3828 comments = (join_comments or []) + comments 3829 return self.expression(exp.Join, comments=comments, **kwargs) 3830 3831 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3832 this = self._parse_assignment() 3833 3834 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3835 return this 3836 3837 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3838 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3839 3840 return this 3841 3842 def _parse_index_params(self) -> exp.IndexParameters: 3843 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3844 3845 if self._match(TokenType.L_PAREN, advance=False): 3846 columns = self._parse_wrapped_csv(self._parse_with_operator) 3847 else: 3848 columns = None 3849 3850 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3851 partition_by = self._parse_partition_by() 3852 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3853 tablespace = ( 3854 self._parse_var(any_token=True) 3855 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3856 else None 3857 ) 3858 where = self._parse_where() 3859 3860 on = self._parse_field() if self._match(TokenType.ON) else None 3861 3862 return self.expression( 3863 exp.IndexParameters, 3864 using=using, 3865 columns=columns, 3866 include=include, 3867 partition_by=partition_by, 3868 where=where, 3869 with_storage=with_storage, 3870 tablespace=tablespace, 3871 on=on, 3872 ) 3873 3874 def _parse_index( 3875 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3876 ) -> t.Optional[exp.Index]: 3877 if index or anonymous: 3878 unique = None 3879 primary = None 3880 amp = None 3881 3882 self._match(TokenType.ON) 3883 self._match(TokenType.TABLE) # hive 3884 table = self._parse_table_parts(schema=True) 3885 else: 3886 unique = self._match(TokenType.UNIQUE) 3887 primary = self._match_text_seq("PRIMARY") 3888 amp = self._match_text_seq("AMP") 3889 3890 if not self._match(TokenType.INDEX): 3891 return None 3892 3893 index = self._parse_id_var() 3894 table = None 3895 3896 params = self._parse_index_params() 3897 3898 return self.expression( 3899 exp.Index, 3900 this=index, 3901 table=table, 3902 unique=unique, 3903 primary=primary, 3904 amp=amp, 3905 params=params, 3906 ) 3907 3908 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3909 hints: t.List[exp.Expression] = [] 3910 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3911 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3912 hints.append( 3913 self.expression( 3914 exp.WithTableHint, 3915 expressions=self._parse_csv( 3916 lambda: self._parse_function() or self._parse_var(any_token=True) 3917 ), 3918 ) 3919 ) 3920 self._match_r_paren() 3921 else: 3922 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3923 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3924 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3925 3926 self._match_set((TokenType.INDEX, TokenType.KEY)) 3927 if self._match(TokenType.FOR): 3928 hint.set("target", self._advance_any() and self._prev.text.upper()) 3929 3930 hint.set("expressions", self._parse_wrapped_id_vars()) 3931 hints.append(hint) 3932 3933 return hints or None 3934 3935 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3936 return ( 3937 (not schema and self._parse_function(optional_parens=False)) 3938 or self._parse_id_var(any_token=False) 3939 or self._parse_string_as_identifier() 3940 or self._parse_placeholder() 3941 ) 3942 3943 def _parse_table_parts( 3944 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3945 ) -> exp.Table: 3946 catalog = None 3947 db = None 3948 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3949 3950 while self._match(TokenType.DOT): 3951 if catalog: 3952 # This allows nesting the table in arbitrarily many dot expressions if needed 3953 table = self.expression( 3954 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3955 ) 3956 else: 3957 catalog = db 3958 db = table 3959 # "" used for tsql FROM a..b case 3960 table = self._parse_table_part(schema=schema) or "" 3961 3962 if ( 3963 wildcard 3964 and self._is_connected() 3965 and (isinstance(table, exp.Identifier) or not table) 3966 and self._match(TokenType.STAR) 3967 ): 3968 if isinstance(table, exp.Identifier): 3969 table.args["this"] += "*" 3970 else: 3971 table = exp.Identifier(this="*") 3972 3973 # We bubble up comments from the Identifier to the Table 3974 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3975 3976 if is_db_reference: 3977 catalog = db 3978 db = table 3979 table = None 3980 3981 if not table and not is_db_reference: 3982 self.raise_error(f"Expected table name but got {self._curr}") 3983 if not db and is_db_reference: 3984 self.raise_error(f"Expected database name but got {self._curr}") 3985 3986 table = self.expression( 3987 exp.Table, 3988 comments=comments, 3989 this=table, 3990 db=db, 3991 catalog=catalog, 3992 ) 3993 3994 changes = self._parse_changes() 3995 if changes: 3996 table.set("changes", changes) 3997 3998 at_before = self._parse_historical_data() 3999 if at_before: 4000 table.set("when", at_before) 4001 4002 pivots = self._parse_pivots() 4003 if pivots: 4004 table.set("pivots", pivots) 4005 4006 return table 4007 4008 def _parse_table( 4009 self, 4010 schema: bool = False, 4011 joins: bool = False, 4012 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4013 parse_bracket: bool = False, 4014 is_db_reference: bool = False, 4015 parse_partition: bool = False, 4016 consume_pipe: bool = False, 4017 ) -> t.Optional[exp.Expression]: 4018 lateral = self._parse_lateral() 4019 if lateral: 4020 return lateral 4021 4022 unnest = self._parse_unnest() 4023 if unnest: 4024 return unnest 4025 4026 values = self._parse_derived_table_values() 4027 if values: 4028 return values 4029 4030 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4031 if subquery: 4032 if not subquery.args.get("pivots"): 4033 subquery.set("pivots", self._parse_pivots()) 4034 return subquery 4035 4036 bracket = parse_bracket and self._parse_bracket(None) 4037 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4038 4039 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4040 self._parse_table 4041 ) 4042 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4043 4044 only = self._match(TokenType.ONLY) 4045 4046 this = t.cast( 4047 exp.Expression, 4048 bracket 4049 or rows_from 4050 or self._parse_bracket( 4051 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4052 ), 4053 ) 4054 4055 if only: 4056 this.set("only", only) 4057 4058 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4059 self._match_text_seq("*") 4060 4061 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4062 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4063 this.set("partition", self._parse_partition()) 4064 4065 if schema: 4066 return self._parse_schema(this=this) 4067 4068 version = self._parse_version() 4069 4070 if version: 4071 this.set("version", version) 4072 4073 if self.dialect.ALIAS_POST_TABLESAMPLE: 4074 this.set("sample", self._parse_table_sample()) 4075 4076 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4077 if alias: 4078 this.set("alias", alias) 4079 4080 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4081 return self.expression( 4082 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4083 ) 4084 4085 this.set("hints", self._parse_table_hints()) 4086 4087 if not this.args.get("pivots"): 4088 this.set("pivots", self._parse_pivots()) 4089 4090 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4091 this.set("sample", self._parse_table_sample()) 4092 4093 if joins: 4094 for join in self._parse_joins(): 4095 this.append("joins", join) 4096 4097 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4098 this.set("ordinality", True) 4099 this.set("alias", self._parse_table_alias()) 4100 4101 return this 4102 4103 def _parse_version(self) -> t.Optional[exp.Version]: 4104 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4105 this = "TIMESTAMP" 4106 elif self._match(TokenType.VERSION_SNAPSHOT): 4107 this = "VERSION" 4108 else: 4109 return None 4110 4111 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4112 kind = self._prev.text.upper() 4113 start = self._parse_bitwise() 4114 self._match_texts(("TO", "AND")) 4115 end = self._parse_bitwise() 4116 expression: t.Optional[exp.Expression] = self.expression( 4117 exp.Tuple, expressions=[start, end] 4118 ) 4119 elif self._match_text_seq("CONTAINED", "IN"): 4120 kind = "CONTAINED IN" 4121 expression = self.expression( 4122 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4123 ) 4124 elif self._match(TokenType.ALL): 4125 kind = "ALL" 4126 expression = None 4127 else: 4128 self._match_text_seq("AS", "OF") 4129 kind = "AS OF" 4130 expression = self._parse_type() 4131 4132 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4133 4134 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4135 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4136 index = self._index 4137 historical_data = None 4138 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4139 this = self._prev.text.upper() 4140 kind = ( 4141 self._match(TokenType.L_PAREN) 4142 and self._match_texts(self.HISTORICAL_DATA_KIND) 4143 and self._prev.text.upper() 4144 ) 4145 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4146 4147 if expression: 4148 self._match_r_paren() 4149 historical_data = self.expression( 4150 exp.HistoricalData, this=this, kind=kind, expression=expression 4151 ) 4152 else: 4153 self._retreat(index) 4154 4155 return historical_data 4156 4157 def _parse_changes(self) -> t.Optional[exp.Changes]: 4158 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4159 return None 4160 4161 information = self._parse_var(any_token=True) 4162 self._match_r_paren() 4163 4164 return self.expression( 4165 exp.Changes, 4166 information=information, 4167 at_before=self._parse_historical_data(), 4168 end=self._parse_historical_data(), 4169 ) 4170 4171 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4172 if not self._match(TokenType.UNNEST): 4173 return None 4174 4175 expressions = self._parse_wrapped_csv(self._parse_equality) 4176 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4177 4178 alias = self._parse_table_alias() if with_alias else None 4179 4180 if alias: 4181 if self.dialect.UNNEST_COLUMN_ONLY: 4182 if alias.args.get("columns"): 4183 self.raise_error("Unexpected extra column alias in unnest.") 4184 4185 alias.set("columns", [alias.this]) 4186 alias.set("this", None) 4187 4188 columns = alias.args.get("columns") or [] 4189 if offset and len(expressions) < len(columns): 4190 offset = columns.pop() 4191 4192 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4193 self._match(TokenType.ALIAS) 4194 offset = self._parse_id_var( 4195 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4196 ) or exp.to_identifier("offset") 4197 4198 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4199 4200 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4201 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4202 if not is_derived and not ( 4203 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4204 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4205 ): 4206 return None 4207 4208 expressions = self._parse_csv(self._parse_value) 4209 alias = self._parse_table_alias() 4210 4211 if is_derived: 4212 self._match_r_paren() 4213 4214 return self.expression( 4215 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4216 ) 4217 4218 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4219 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4220 as_modifier and self._match_text_seq("USING", "SAMPLE") 4221 ): 4222 return None 4223 4224 bucket_numerator = None 4225 bucket_denominator = None 4226 bucket_field = None 4227 percent = None 4228 size = None 4229 seed = None 4230 4231 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4232 matched_l_paren = self._match(TokenType.L_PAREN) 4233 4234 if self.TABLESAMPLE_CSV: 4235 num = None 4236 expressions = self._parse_csv(self._parse_primary) 4237 else: 4238 expressions = None 4239 num = ( 4240 self._parse_factor() 4241 if self._match(TokenType.NUMBER, advance=False) 4242 else self._parse_primary() or self._parse_placeholder() 4243 ) 4244 4245 if self._match_text_seq("BUCKET"): 4246 bucket_numerator = self._parse_number() 4247 self._match_text_seq("OUT", "OF") 4248 bucket_denominator = bucket_denominator = self._parse_number() 4249 self._match(TokenType.ON) 4250 bucket_field = self._parse_field() 4251 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4252 percent = num 4253 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4254 size = num 4255 else: 4256 percent = num 4257 4258 if matched_l_paren: 4259 self._match_r_paren() 4260 4261 if self._match(TokenType.L_PAREN): 4262 method = self._parse_var(upper=True) 4263 seed = self._match(TokenType.COMMA) and self._parse_number() 4264 self._match_r_paren() 4265 elif self._match_texts(("SEED", "REPEATABLE")): 4266 seed = self._parse_wrapped(self._parse_number) 4267 4268 if not method and self.DEFAULT_SAMPLING_METHOD: 4269 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4270 4271 return self.expression( 4272 exp.TableSample, 4273 expressions=expressions, 4274 method=method, 4275 bucket_numerator=bucket_numerator, 4276 bucket_denominator=bucket_denominator, 4277 bucket_field=bucket_field, 4278 percent=percent, 4279 size=size, 4280 seed=seed, 4281 ) 4282 4283 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4284 return list(iter(self._parse_pivot, None)) or None 4285 4286 def _parse_joins(self) -> t.Iterator[exp.Join]: 4287 return iter(self._parse_join, None) 4288 4289 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4290 if not self._match(TokenType.INTO): 4291 return None 4292 4293 return self.expression( 4294 exp.UnpivotColumns, 4295 this=self._match_text_seq("NAME") and self._parse_column(), 4296 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4297 ) 4298 4299 # https://duckdb.org/docs/sql/statements/pivot 4300 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4301 def _parse_on() -> t.Optional[exp.Expression]: 4302 this = self._parse_bitwise() 4303 4304 if self._match(TokenType.IN): 4305 # PIVOT ... ON col IN (row_val1, row_val2) 4306 return self._parse_in(this) 4307 if self._match(TokenType.ALIAS, advance=False): 4308 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4309 return self._parse_alias(this) 4310 4311 return this 4312 4313 this = self._parse_table() 4314 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4315 into = self._parse_unpivot_columns() 4316 using = self._match(TokenType.USING) and self._parse_csv( 4317 lambda: self._parse_alias(self._parse_function()) 4318 ) 4319 group = self._parse_group() 4320 4321 return self.expression( 4322 exp.Pivot, 4323 this=this, 4324 expressions=expressions, 4325 using=using, 4326 group=group, 4327 unpivot=is_unpivot, 4328 into=into, 4329 ) 4330 4331 def _parse_pivot_in(self) -> exp.In: 4332 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4333 this = self._parse_select_or_expression() 4334 4335 self._match(TokenType.ALIAS) 4336 alias = self._parse_bitwise() 4337 if alias: 4338 if isinstance(alias, exp.Column) and not alias.db: 4339 alias = alias.this 4340 return self.expression(exp.PivotAlias, this=this, alias=alias) 4341 4342 return this 4343 4344 value = self._parse_column() 4345 4346 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4347 self.raise_error("Expecting IN (") 4348 4349 if self._match(TokenType.ANY): 4350 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4351 else: 4352 exprs = self._parse_csv(_parse_aliased_expression) 4353 4354 self._match_r_paren() 4355 return self.expression(exp.In, this=value, expressions=exprs) 4356 4357 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4358 func = self._parse_function() 4359 if not func: 4360 self.raise_error("Expecting an aggregation function in PIVOT") 4361 4362 return self._parse_alias(func) 4363 4364 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4365 index = self._index 4366 include_nulls = None 4367 4368 if self._match(TokenType.PIVOT): 4369 unpivot = False 4370 elif self._match(TokenType.UNPIVOT): 4371 unpivot = True 4372 4373 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4374 if self._match_text_seq("INCLUDE", "NULLS"): 4375 include_nulls = True 4376 elif self._match_text_seq("EXCLUDE", "NULLS"): 4377 include_nulls = False 4378 else: 4379 return None 4380 4381 expressions = [] 4382 4383 if not self._match(TokenType.L_PAREN): 4384 self._retreat(index) 4385 return None 4386 4387 if unpivot: 4388 expressions = self._parse_csv(self._parse_column) 4389 else: 4390 expressions = self._parse_csv(self._parse_pivot_aggregation) 4391 4392 if not expressions: 4393 self.raise_error("Failed to parse PIVOT's aggregation list") 4394 4395 if not self._match(TokenType.FOR): 4396 self.raise_error("Expecting FOR") 4397 4398 fields = [] 4399 while True: 4400 field = self._try_parse(self._parse_pivot_in) 4401 if not field: 4402 break 4403 fields.append(field) 4404 4405 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4406 self._parse_bitwise 4407 ) 4408 4409 group = self._parse_group() 4410 4411 self._match_r_paren() 4412 4413 pivot = self.expression( 4414 exp.Pivot, 4415 expressions=expressions, 4416 fields=fields, 4417 unpivot=unpivot, 4418 include_nulls=include_nulls, 4419 default_on_null=default_on_null, 4420 group=group, 4421 ) 4422 4423 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4424 pivot.set("alias", self._parse_table_alias()) 4425 4426 if not unpivot: 4427 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4428 4429 columns: t.List[exp.Expression] = [] 4430 all_fields = [] 4431 for pivot_field in pivot.fields: 4432 pivot_field_expressions = pivot_field.expressions 4433 4434 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4435 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4436 continue 4437 4438 all_fields.append( 4439 [ 4440 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4441 for fld in pivot_field_expressions 4442 ] 4443 ) 4444 4445 if all_fields: 4446 if names: 4447 all_fields.append(names) 4448 4449 # Generate all possible combinations of the pivot columns 4450 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4451 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4452 for fld_parts_tuple in itertools.product(*all_fields): 4453 fld_parts = list(fld_parts_tuple) 4454 4455 if names and self.PREFIXED_PIVOT_COLUMNS: 4456 # Move the "name" to the front of the list 4457 fld_parts.insert(0, fld_parts.pop(-1)) 4458 4459 columns.append(exp.to_identifier("_".join(fld_parts))) 4460 4461 pivot.set("columns", columns) 4462 4463 return pivot 4464 4465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4466 return [agg.alias for agg in aggregations if agg.alias] 4467 4468 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4469 if not skip_where_token and not self._match(TokenType.PREWHERE): 4470 return None 4471 4472 return self.expression( 4473 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4474 ) 4475 4476 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4477 if not skip_where_token and not self._match(TokenType.WHERE): 4478 return None 4479 4480 return self.expression( 4481 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4482 ) 4483 4484 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4485 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4486 return None 4487 comments = self._prev_comments 4488 4489 elements: t.Dict[str, t.Any] = defaultdict(list) 4490 4491 if self._match(TokenType.ALL): 4492 elements["all"] = True 4493 elif self._match(TokenType.DISTINCT): 4494 elements["all"] = False 4495 4496 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4497 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4498 4499 while True: 4500 index = self._index 4501 4502 elements["expressions"].extend( 4503 self._parse_csv( 4504 lambda: None 4505 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4506 else self._parse_assignment() 4507 ) 4508 ) 4509 4510 before_with_index = self._index 4511 with_prefix = self._match(TokenType.WITH) 4512 4513 if self._match(TokenType.ROLLUP): 4514 elements["rollup"].append( 4515 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4516 ) 4517 elif self._match(TokenType.CUBE): 4518 elements["cube"].append( 4519 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4520 ) 4521 elif self._match(TokenType.GROUPING_SETS): 4522 elements["grouping_sets"].append( 4523 self.expression( 4524 exp.GroupingSets, 4525 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4526 ) 4527 ) 4528 elif self._match_text_seq("TOTALS"): 4529 elements["totals"] = True # type: ignore 4530 4531 if before_with_index <= self._index <= before_with_index + 1: 4532 self._retreat(before_with_index) 4533 break 4534 4535 if index == self._index: 4536 break 4537 4538 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4539 4540 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4541 return self.expression( 4542 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4543 ) 4544 4545 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4546 if self._match(TokenType.L_PAREN): 4547 grouping_set = self._parse_csv(self._parse_column) 4548 self._match_r_paren() 4549 return self.expression(exp.Tuple, expressions=grouping_set) 4550 4551 return self._parse_column() 4552 4553 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4554 if not skip_having_token and not self._match(TokenType.HAVING): 4555 return None 4556 return self.expression( 4557 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4558 ) 4559 4560 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4561 if not self._match(TokenType.QUALIFY): 4562 return None 4563 return self.expression(exp.Qualify, this=self._parse_assignment()) 4564 4565 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4566 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4567 exp.Prior, this=self._parse_bitwise() 4568 ) 4569 connect = self._parse_assignment() 4570 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4571 return connect 4572 4573 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4574 if skip_start_token: 4575 start = None 4576 elif self._match(TokenType.START_WITH): 4577 start = self._parse_assignment() 4578 else: 4579 return None 4580 4581 self._match(TokenType.CONNECT_BY) 4582 nocycle = self._match_text_seq("NOCYCLE") 4583 connect = self._parse_connect_with_prior() 4584 4585 if not start and self._match(TokenType.START_WITH): 4586 start = self._parse_assignment() 4587 4588 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4589 4590 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4591 this = self._parse_id_var(any_token=True) 4592 if self._match(TokenType.ALIAS): 4593 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4594 return this 4595 4596 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4597 if self._match_text_seq("INTERPOLATE"): 4598 return self._parse_wrapped_csv(self._parse_name_as_expression) 4599 return None 4600 4601 def _parse_order( 4602 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4603 ) -> t.Optional[exp.Expression]: 4604 siblings = None 4605 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4606 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4607 return this 4608 4609 siblings = True 4610 4611 return self.expression( 4612 exp.Order, 4613 comments=self._prev_comments, 4614 this=this, 4615 expressions=self._parse_csv(self._parse_ordered), 4616 siblings=siblings, 4617 ) 4618 4619 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4620 if not self._match(token): 4621 return None 4622 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4623 4624 def _parse_ordered( 4625 self, parse_method: t.Optional[t.Callable] = None 4626 ) -> t.Optional[exp.Ordered]: 4627 this = parse_method() if parse_method else self._parse_assignment() 4628 if not this: 4629 return None 4630 4631 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4632 this = exp.var("ALL") 4633 4634 asc = self._match(TokenType.ASC) 4635 desc = self._match(TokenType.DESC) or (asc and False) 4636 4637 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4638 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4639 4640 nulls_first = is_nulls_first or False 4641 explicitly_null_ordered = is_nulls_first or is_nulls_last 4642 4643 if ( 4644 not explicitly_null_ordered 4645 and ( 4646 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4647 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4648 ) 4649 and self.dialect.NULL_ORDERING != "nulls_are_last" 4650 ): 4651 nulls_first = True 4652 4653 if self._match_text_seq("WITH", "FILL"): 4654 with_fill = self.expression( 4655 exp.WithFill, 4656 **{ # type: ignore 4657 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4658 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4659 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4660 "interpolate": self._parse_interpolate(), 4661 }, 4662 ) 4663 else: 4664 with_fill = None 4665 4666 return self.expression( 4667 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4668 ) 4669 4670 def _parse_limit_options(self) -> exp.LimitOptions: 4671 percent = self._match(TokenType.PERCENT) 4672 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4673 self._match_text_seq("ONLY") 4674 with_ties = self._match_text_seq("WITH", "TIES") 4675 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4676 4677 def _parse_limit( 4678 self, 4679 this: t.Optional[exp.Expression] = None, 4680 top: bool = False, 4681 skip_limit_token: bool = False, 4682 ) -> t.Optional[exp.Expression]: 4683 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4684 comments = self._prev_comments 4685 if top: 4686 limit_paren = self._match(TokenType.L_PAREN) 4687 expression = self._parse_term() if limit_paren else self._parse_number() 4688 4689 if limit_paren: 4690 self._match_r_paren() 4691 4692 limit_options = self._parse_limit_options() 4693 else: 4694 limit_options = None 4695 expression = self._parse_term() 4696 4697 if self._match(TokenType.COMMA): 4698 offset = expression 4699 expression = self._parse_term() 4700 else: 4701 offset = None 4702 4703 limit_exp = self.expression( 4704 exp.Limit, 4705 this=this, 4706 expression=expression, 4707 offset=offset, 4708 comments=comments, 4709 limit_options=limit_options, 4710 expressions=self._parse_limit_by(), 4711 ) 4712 4713 return limit_exp 4714 4715 if self._match(TokenType.FETCH): 4716 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4717 direction = self._prev.text.upper() if direction else "FIRST" 4718 4719 count = self._parse_field(tokens=self.FETCH_TOKENS) 4720 4721 return self.expression( 4722 exp.Fetch, 4723 direction=direction, 4724 count=count, 4725 limit_options=self._parse_limit_options(), 4726 ) 4727 4728 return this 4729 4730 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4731 if not self._match(TokenType.OFFSET): 4732 return this 4733 4734 count = self._parse_term() 4735 self._match_set((TokenType.ROW, TokenType.ROWS)) 4736 4737 return self.expression( 4738 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4739 ) 4740 4741 def _can_parse_limit_or_offset(self) -> bool: 4742 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4743 return False 4744 4745 index = self._index 4746 result = bool( 4747 self._try_parse(self._parse_limit, retreat=True) 4748 or self._try_parse(self._parse_offset, retreat=True) 4749 ) 4750 self._retreat(index) 4751 return result 4752 4753 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4754 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4755 4756 def _parse_locks(self) -> t.List[exp.Lock]: 4757 locks = [] 4758 while True: 4759 update, key = None, None 4760 if self._match_text_seq("FOR", "UPDATE"): 4761 update = True 4762 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4763 "LOCK", "IN", "SHARE", "MODE" 4764 ): 4765 update = False 4766 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4767 update, key = False, True 4768 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4769 update, key = True, True 4770 else: 4771 break 4772 4773 expressions = None 4774 if self._match_text_seq("OF"): 4775 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4776 4777 wait: t.Optional[bool | exp.Expression] = None 4778 if self._match_text_seq("NOWAIT"): 4779 wait = True 4780 elif self._match_text_seq("WAIT"): 4781 wait = self._parse_primary() 4782 elif self._match_text_seq("SKIP", "LOCKED"): 4783 wait = False 4784 4785 locks.append( 4786 self.expression( 4787 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4788 ) 4789 ) 4790 4791 return locks 4792 4793 def parse_set_operation( 4794 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4795 ) -> t.Optional[exp.Expression]: 4796 start = self._index 4797 _, side_token, kind_token = self._parse_join_parts() 4798 4799 side = side_token.text if side_token else None 4800 kind = kind_token.text if kind_token else None 4801 4802 if not self._match_set(self.SET_OPERATIONS): 4803 self._retreat(start) 4804 return None 4805 4806 token_type = self._prev.token_type 4807 4808 if token_type == TokenType.UNION: 4809 operation: t.Type[exp.SetOperation] = exp.Union 4810 elif token_type == TokenType.EXCEPT: 4811 operation = exp.Except 4812 else: 4813 operation = exp.Intersect 4814 4815 comments = self._prev.comments 4816 4817 if self._match(TokenType.DISTINCT): 4818 distinct: t.Optional[bool] = True 4819 elif self._match(TokenType.ALL): 4820 distinct = False 4821 else: 4822 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4823 if distinct is None: 4824 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4825 4826 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4827 "STRICT", "CORRESPONDING" 4828 ) 4829 if self._match_text_seq("CORRESPONDING"): 4830 by_name = True 4831 if not side and not kind: 4832 kind = "INNER" 4833 4834 on_column_list = None 4835 if by_name and self._match_texts(("ON", "BY")): 4836 on_column_list = self._parse_wrapped_csv(self._parse_column) 4837 4838 expression = self._parse_select( 4839 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4840 ) 4841 4842 return self.expression( 4843 operation, 4844 comments=comments, 4845 this=this, 4846 distinct=distinct, 4847 by_name=by_name, 4848 expression=expression, 4849 side=side, 4850 kind=kind, 4851 on=on_column_list, 4852 ) 4853 4854 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4855 while this: 4856 setop = self.parse_set_operation(this) 4857 if not setop: 4858 break 4859 this = setop 4860 4861 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4862 expression = this.expression 4863 4864 if expression: 4865 for arg in self.SET_OP_MODIFIERS: 4866 expr = expression.args.get(arg) 4867 if expr: 4868 this.set(arg, expr.pop()) 4869 4870 return this 4871 4872 def _parse_expression(self) -> t.Optional[exp.Expression]: 4873 return self._parse_alias(self._parse_assignment()) 4874 4875 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4876 this = self._parse_disjunction() 4877 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4878 # This allows us to parse <non-identifier token> := <expr> 4879 this = exp.column( 4880 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4881 ) 4882 4883 while self._match_set(self.ASSIGNMENT): 4884 if isinstance(this, exp.Column) and len(this.parts) == 1: 4885 this = this.this 4886 4887 this = self.expression( 4888 self.ASSIGNMENT[self._prev.token_type], 4889 this=this, 4890 comments=self._prev_comments, 4891 expression=self._parse_assignment(), 4892 ) 4893 4894 return this 4895 4896 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4897 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4898 4899 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4900 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4901 4902 def _parse_equality(self) -> t.Optional[exp.Expression]: 4903 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4904 4905 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4906 return self._parse_tokens(self._parse_range, self.COMPARISON) 4907 4908 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4909 this = this or self._parse_bitwise() 4910 negate = self._match(TokenType.NOT) 4911 4912 if self._match_set(self.RANGE_PARSERS): 4913 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4914 if not expression: 4915 return this 4916 4917 this = expression 4918 elif self._match(TokenType.ISNULL): 4919 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4920 4921 # Postgres supports ISNULL and NOTNULL for conditions. 4922 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4923 if self._match(TokenType.NOTNULL): 4924 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4925 this = self.expression(exp.Not, this=this) 4926 4927 if negate: 4928 this = self._negate_range(this) 4929 4930 if self._match(TokenType.IS): 4931 this = self._parse_is(this) 4932 4933 return this 4934 4935 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4936 if not this: 4937 return this 4938 4939 return self.expression(exp.Not, this=this) 4940 4941 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4942 index = self._index - 1 4943 negate = self._match(TokenType.NOT) 4944 4945 if self._match_text_seq("DISTINCT", "FROM"): 4946 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4947 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4948 4949 if self._match(TokenType.JSON): 4950 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4951 4952 if self._match_text_seq("WITH"): 4953 _with = True 4954 elif self._match_text_seq("WITHOUT"): 4955 _with = False 4956 else: 4957 _with = None 4958 4959 unique = self._match(TokenType.UNIQUE) 4960 self._match_text_seq("KEYS") 4961 expression: t.Optional[exp.Expression] = self.expression( 4962 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4963 ) 4964 else: 4965 expression = self._parse_primary() or self._parse_null() 4966 if not expression: 4967 self._retreat(index) 4968 return None 4969 4970 this = self.expression(exp.Is, this=this, expression=expression) 4971 return self.expression(exp.Not, this=this) if negate else this 4972 4973 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4974 unnest = self._parse_unnest(with_alias=False) 4975 if unnest: 4976 this = self.expression(exp.In, this=this, unnest=unnest) 4977 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4978 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4979 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4980 4981 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4982 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4983 else: 4984 this = self.expression(exp.In, this=this, expressions=expressions) 4985 4986 if matched_l_paren: 4987 self._match_r_paren(this) 4988 elif not self._match(TokenType.R_BRACKET, expression=this): 4989 self.raise_error("Expecting ]") 4990 else: 4991 this = self.expression(exp.In, this=this, field=self._parse_column()) 4992 4993 return this 4994 4995 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4996 symmetric = None 4997 if self._match_text_seq("SYMMETRIC"): 4998 symmetric = True 4999 elif self._match_text_seq("ASYMMETRIC"): 5000 symmetric = False 5001 5002 low = self._parse_bitwise() 5003 self._match(TokenType.AND) 5004 high = self._parse_bitwise() 5005 5006 return self.expression( 5007 exp.Between, 5008 this=this, 5009 low=low, 5010 high=high, 5011 symmetric=symmetric, 5012 ) 5013 5014 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5015 if not self._match(TokenType.ESCAPE): 5016 return this 5017 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5018 5019 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5020 index = self._index 5021 5022 if not self._match(TokenType.INTERVAL) and match_interval: 5023 return None 5024 5025 if self._match(TokenType.STRING, advance=False): 5026 this = self._parse_primary() 5027 else: 5028 this = self._parse_term() 5029 5030 if not this or ( 5031 isinstance(this, exp.Column) 5032 and not this.table 5033 and not this.this.quoted 5034 and this.name.upper() == "IS" 5035 ): 5036 self._retreat(index) 5037 return None 5038 5039 unit = self._parse_function() or ( 5040 not self._match(TokenType.ALIAS, advance=False) 5041 and self._parse_var(any_token=True, upper=True) 5042 ) 5043 5044 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5045 # each INTERVAL expression into this canonical form so it's easy to transpile 5046 if this and this.is_number: 5047 this = exp.Literal.string(this.to_py()) 5048 elif this and this.is_string: 5049 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5050 if parts and unit: 5051 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5052 unit = None 5053 self._retreat(self._index - 1) 5054 5055 if len(parts) == 1: 5056 this = exp.Literal.string(parts[0][0]) 5057 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5058 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5059 unit = self.expression( 5060 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5061 ) 5062 5063 interval = self.expression(exp.Interval, this=this, unit=unit) 5064 5065 index = self._index 5066 self._match(TokenType.PLUS) 5067 5068 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5069 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5070 return self.expression( 5071 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5072 ) 5073 5074 self._retreat(index) 5075 return interval 5076 5077 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5078 this = self._parse_term() 5079 5080 while True: 5081 if self._match_set(self.BITWISE): 5082 this = self.expression( 5083 self.BITWISE[self._prev.token_type], 5084 this=this, 5085 expression=self._parse_term(), 5086 ) 5087 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5088 this = self.expression( 5089 exp.DPipe, 5090 this=this, 5091 expression=self._parse_term(), 5092 safe=not self.dialect.STRICT_STRING_CONCAT, 5093 ) 5094 elif self._match(TokenType.DQMARK): 5095 this = self.expression( 5096 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5097 ) 5098 elif self._match_pair(TokenType.LT, TokenType.LT): 5099 this = self.expression( 5100 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5101 ) 5102 elif self._match_pair(TokenType.GT, TokenType.GT): 5103 this = self.expression( 5104 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5105 ) 5106 else: 5107 break 5108 5109 return this 5110 5111 def _parse_term(self) -> t.Optional[exp.Expression]: 5112 this = self._parse_factor() 5113 5114 while self._match_set(self.TERM): 5115 klass = self.TERM[self._prev.token_type] 5116 comments = self._prev_comments 5117 expression = self._parse_factor() 5118 5119 this = self.expression(klass, this=this, comments=comments, expression=expression) 5120 5121 if isinstance(this, exp.Collate): 5122 expr = this.expression 5123 5124 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5125 # fallback to Identifier / Var 5126 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5127 ident = expr.this 5128 if isinstance(ident, exp.Identifier): 5129 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5130 5131 return this 5132 5133 def _parse_factor(self) -> t.Optional[exp.Expression]: 5134 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5135 this = parse_method() 5136 5137 while self._match_set(self.FACTOR): 5138 klass = self.FACTOR[self._prev.token_type] 5139 comments = self._prev_comments 5140 expression = parse_method() 5141 5142 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5143 self._retreat(self._index - 1) 5144 return this 5145 5146 this = self.expression(klass, this=this, comments=comments, expression=expression) 5147 5148 if isinstance(this, exp.Div): 5149 this.args["typed"] = self.dialect.TYPED_DIVISION 5150 this.args["safe"] = self.dialect.SAFE_DIVISION 5151 5152 return this 5153 5154 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5155 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5156 5157 def _parse_unary(self) -> t.Optional[exp.Expression]: 5158 if self._match_set(self.UNARY_PARSERS): 5159 return self.UNARY_PARSERS[self._prev.token_type](self) 5160 return self._parse_at_time_zone(self._parse_type()) 5161 5162 def _parse_type( 5163 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5164 ) -> t.Optional[exp.Expression]: 5165 interval = parse_interval and self._parse_interval() 5166 if interval: 5167 return interval 5168 5169 index = self._index 5170 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5171 5172 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5173 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5174 if isinstance(data_type, exp.Cast): 5175 # This constructor can contain ops directly after it, for instance struct unnesting: 5176 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5177 return self._parse_column_ops(data_type) 5178 5179 if data_type: 5180 index2 = self._index 5181 this = self._parse_primary() 5182 5183 if isinstance(this, exp.Literal): 5184 literal = this.name 5185 this = self._parse_column_ops(this) 5186 5187 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5188 if parser: 5189 return parser(self, this, data_type) 5190 5191 if ( 5192 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5193 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5194 and TIME_ZONE_RE.search(literal) 5195 ): 5196 data_type = exp.DataType.build("TIMESTAMPTZ") 5197 5198 return self.expression(exp.Cast, this=this, to=data_type) 5199 5200 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5201 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5202 # 5203 # If the index difference here is greater than 1, that means the parser itself must have 5204 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5205 # 5206 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5207 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5208 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5209 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5210 # 5211 # In these cases, we don't really want to return the converted type, but instead retreat 5212 # and try to parse a Column or Identifier in the section below. 5213 if data_type.expressions and index2 - index > 1: 5214 self._retreat(index2) 5215 return self._parse_column_ops(data_type) 5216 5217 self._retreat(index) 5218 5219 if fallback_to_identifier: 5220 return self._parse_id_var() 5221 5222 this = self._parse_column() 5223 return this and self._parse_column_ops(this) 5224 5225 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5226 this = self._parse_type() 5227 if not this: 5228 return None 5229 5230 if isinstance(this, exp.Column) and not this.table: 5231 this = exp.var(this.name.upper()) 5232 5233 return self.expression( 5234 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5235 ) 5236 5237 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5238 type_name = identifier.name 5239 5240 while self._match(TokenType.DOT): 5241 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5242 5243 return exp.DataType.build(type_name, udt=True) 5244 5245 def _parse_types( 5246 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5247 ) -> t.Optional[exp.Expression]: 5248 index = self._index 5249 5250 this: t.Optional[exp.Expression] = None 5251 prefix = self._match_text_seq("SYSUDTLIB", ".") 5252 5253 if not self._match_set(self.TYPE_TOKENS): 5254 identifier = allow_identifiers and self._parse_id_var( 5255 any_token=False, tokens=(TokenType.VAR,) 5256 ) 5257 if isinstance(identifier, exp.Identifier): 5258 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5259 5260 if len(tokens) != 1: 5261 self.raise_error("Unexpected identifier", self._prev) 5262 5263 if tokens[0].token_type in self.TYPE_TOKENS: 5264 self._prev = tokens[0] 5265 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5266 this = self._parse_user_defined_type(identifier) 5267 else: 5268 self._retreat(self._index - 1) 5269 return None 5270 else: 5271 return None 5272 5273 type_token = self._prev.token_type 5274 5275 if type_token == TokenType.PSEUDO_TYPE: 5276 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5277 5278 if type_token == TokenType.OBJECT_IDENTIFIER: 5279 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5280 5281 # https://materialize.com/docs/sql/types/map/ 5282 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5283 key_type = self._parse_types( 5284 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5285 ) 5286 if not self._match(TokenType.FARROW): 5287 self._retreat(index) 5288 return None 5289 5290 value_type = self._parse_types( 5291 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5292 ) 5293 if not self._match(TokenType.R_BRACKET): 5294 self._retreat(index) 5295 return None 5296 5297 return exp.DataType( 5298 this=exp.DataType.Type.MAP, 5299 expressions=[key_type, value_type], 5300 nested=True, 5301 prefix=prefix, 5302 ) 5303 5304 nested = type_token in self.NESTED_TYPE_TOKENS 5305 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5306 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5307 expressions = None 5308 maybe_func = False 5309 5310 if self._match(TokenType.L_PAREN): 5311 if is_struct: 5312 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5313 elif nested: 5314 expressions = self._parse_csv( 5315 lambda: self._parse_types( 5316 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5317 ) 5318 ) 5319 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5320 this = expressions[0] 5321 this.set("nullable", True) 5322 self._match_r_paren() 5323 return this 5324 elif type_token in self.ENUM_TYPE_TOKENS: 5325 expressions = self._parse_csv(self._parse_equality) 5326 elif is_aggregate: 5327 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5328 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5329 ) 5330 if not func_or_ident: 5331 return None 5332 expressions = [func_or_ident] 5333 if self._match(TokenType.COMMA): 5334 expressions.extend( 5335 self._parse_csv( 5336 lambda: self._parse_types( 5337 check_func=check_func, 5338 schema=schema, 5339 allow_identifiers=allow_identifiers, 5340 ) 5341 ) 5342 ) 5343 else: 5344 expressions = self._parse_csv(self._parse_type_size) 5345 5346 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5347 if type_token == TokenType.VECTOR and len(expressions) == 2: 5348 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5349 5350 if not expressions or not self._match(TokenType.R_PAREN): 5351 self._retreat(index) 5352 return None 5353 5354 maybe_func = True 5355 5356 values: t.Optional[t.List[exp.Expression]] = None 5357 5358 if nested and self._match(TokenType.LT): 5359 if is_struct: 5360 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5361 else: 5362 expressions = self._parse_csv( 5363 lambda: self._parse_types( 5364 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5365 ) 5366 ) 5367 5368 if not self._match(TokenType.GT): 5369 self.raise_error("Expecting >") 5370 5371 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5372 values = self._parse_csv(self._parse_assignment) 5373 if not values and is_struct: 5374 values = None 5375 self._retreat(self._index - 1) 5376 else: 5377 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5378 5379 if type_token in self.TIMESTAMPS: 5380 if self._match_text_seq("WITH", "TIME", "ZONE"): 5381 maybe_func = False 5382 tz_type = ( 5383 exp.DataType.Type.TIMETZ 5384 if type_token in self.TIMES 5385 else exp.DataType.Type.TIMESTAMPTZ 5386 ) 5387 this = exp.DataType(this=tz_type, expressions=expressions) 5388 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5389 maybe_func = False 5390 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5391 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5392 maybe_func = False 5393 elif type_token == TokenType.INTERVAL: 5394 unit = self._parse_var(upper=True) 5395 if unit: 5396 if self._match_text_seq("TO"): 5397 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5398 5399 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5400 else: 5401 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5402 elif type_token == TokenType.VOID: 5403 this = exp.DataType(this=exp.DataType.Type.NULL) 5404 5405 if maybe_func and check_func: 5406 index2 = self._index 5407 peek = self._parse_string() 5408 5409 if not peek: 5410 self._retreat(index) 5411 return None 5412 5413 self._retreat(index2) 5414 5415 if not this: 5416 if self._match_text_seq("UNSIGNED"): 5417 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5418 if not unsigned_type_token: 5419 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5420 5421 type_token = unsigned_type_token or type_token 5422 5423 this = exp.DataType( 5424 this=exp.DataType.Type[type_token.value], 5425 expressions=expressions, 5426 nested=nested, 5427 prefix=prefix, 5428 ) 5429 5430 # Empty arrays/structs are allowed 5431 if values is not None: 5432 cls = exp.Struct if is_struct else exp.Array 5433 this = exp.cast(cls(expressions=values), this, copy=False) 5434 5435 elif expressions: 5436 this.set("expressions", expressions) 5437 5438 # https://materialize.com/docs/sql/types/list/#type-name 5439 while self._match(TokenType.LIST): 5440 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5441 5442 index = self._index 5443 5444 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5445 matched_array = self._match(TokenType.ARRAY) 5446 5447 while self._curr: 5448 datatype_token = self._prev.token_type 5449 matched_l_bracket = self._match(TokenType.L_BRACKET) 5450 5451 if (not matched_l_bracket and not matched_array) or ( 5452 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5453 ): 5454 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5455 # not to be confused with the fixed size array parsing 5456 break 5457 5458 matched_array = False 5459 values = self._parse_csv(self._parse_assignment) or None 5460 if ( 5461 values 5462 and not schema 5463 and ( 5464 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5465 ) 5466 ): 5467 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5468 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5469 self._retreat(index) 5470 break 5471 5472 this = exp.DataType( 5473 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5474 ) 5475 self._match(TokenType.R_BRACKET) 5476 5477 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5478 converter = self.TYPE_CONVERTERS.get(this.this) 5479 if converter: 5480 this = converter(t.cast(exp.DataType, this)) 5481 5482 return this 5483 5484 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5485 index = self._index 5486 5487 if ( 5488 self._curr 5489 and self._next 5490 and self._curr.token_type in self.TYPE_TOKENS 5491 and self._next.token_type in self.TYPE_TOKENS 5492 ): 5493 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5494 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5495 this = self._parse_id_var() 5496 else: 5497 this = ( 5498 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5499 or self._parse_id_var() 5500 ) 5501 5502 self._match(TokenType.COLON) 5503 5504 if ( 5505 type_required 5506 and not isinstance(this, exp.DataType) 5507 and not self._match_set(self.TYPE_TOKENS, advance=False) 5508 ): 5509 self._retreat(index) 5510 return self._parse_types() 5511 5512 return self._parse_column_def(this) 5513 5514 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5515 if not self._match_text_seq("AT", "TIME", "ZONE"): 5516 return this 5517 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5518 5519 def _parse_column(self) -> t.Optional[exp.Expression]: 5520 this = self._parse_column_reference() 5521 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5522 5523 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5524 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5525 5526 return column 5527 5528 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5529 this = self._parse_field() 5530 if ( 5531 not this 5532 and self._match(TokenType.VALUES, advance=False) 5533 and self.VALUES_FOLLOWED_BY_PAREN 5534 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5535 ): 5536 this = self._parse_id_var() 5537 5538 if isinstance(this, exp.Identifier): 5539 # We bubble up comments from the Identifier to the Column 5540 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5541 5542 return this 5543 5544 def _parse_colon_as_variant_extract( 5545 self, this: t.Optional[exp.Expression] 5546 ) -> t.Optional[exp.Expression]: 5547 casts = [] 5548 json_path = [] 5549 escape = None 5550 5551 while self._match(TokenType.COLON): 5552 start_index = self._index 5553 5554 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5555 path = self._parse_column_ops( 5556 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5557 ) 5558 5559 # The cast :: operator has a lower precedence than the extraction operator :, so 5560 # we rearrange the AST appropriately to avoid casting the JSON path 5561 while isinstance(path, exp.Cast): 5562 casts.append(path.to) 5563 path = path.this 5564 5565 if casts: 5566 dcolon_offset = next( 5567 i 5568 for i, t in enumerate(self._tokens[start_index:]) 5569 if t.token_type == TokenType.DCOLON 5570 ) 5571 end_token = self._tokens[start_index + dcolon_offset - 1] 5572 else: 5573 end_token = self._prev 5574 5575 if path: 5576 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5577 # it'll roundtrip to a string literal in GET_PATH 5578 if isinstance(path, exp.Identifier) and path.quoted: 5579 escape = True 5580 5581 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5582 5583 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5584 # Databricks transforms it back to the colon/dot notation 5585 if json_path: 5586 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5587 5588 if json_path_expr: 5589 json_path_expr.set("escape", escape) 5590 5591 this = self.expression( 5592 exp.JSONExtract, 5593 this=this, 5594 expression=json_path_expr, 5595 variant_extract=True, 5596 ) 5597 5598 while casts: 5599 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5600 5601 return this 5602 5603 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5604 return self._parse_types() 5605 5606 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5607 this = self._parse_bracket(this) 5608 5609 while self._match_set(self.COLUMN_OPERATORS): 5610 op_token = self._prev.token_type 5611 op = self.COLUMN_OPERATORS.get(op_token) 5612 5613 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5614 field = self._parse_dcolon() 5615 if not field: 5616 self.raise_error("Expected type") 5617 elif op and self._curr: 5618 field = self._parse_column_reference() or self._parse_bracket() 5619 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5620 field = self._parse_column_ops(field) 5621 else: 5622 field = self._parse_field(any_token=True, anonymous_func=True) 5623 5624 # Function calls can be qualified, e.g., x.y.FOO() 5625 # This converts the final AST to a series of Dots leading to the function call 5626 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5627 if isinstance(field, (exp.Func, exp.Window)) and this: 5628 this = this.transform( 5629 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5630 ) 5631 5632 if op: 5633 this = op(self, this, field) 5634 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5635 this = self.expression( 5636 exp.Column, 5637 comments=this.comments, 5638 this=field, 5639 table=this.this, 5640 db=this.args.get("table"), 5641 catalog=this.args.get("db"), 5642 ) 5643 elif isinstance(field, exp.Window): 5644 # Move the exp.Dot's to the window's function 5645 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5646 field.set("this", window_func) 5647 this = field 5648 else: 5649 this = self.expression(exp.Dot, this=this, expression=field) 5650 5651 if field and field.comments: 5652 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5653 5654 this = self._parse_bracket(this) 5655 5656 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5657 5658 def _parse_paren(self) -> t.Optional[exp.Expression]: 5659 if not self._match(TokenType.L_PAREN): 5660 return None 5661 5662 comments = self._prev_comments 5663 query = self._parse_select() 5664 5665 if query: 5666 expressions = [query] 5667 else: 5668 expressions = self._parse_expressions() 5669 5670 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5671 5672 if not this and self._match(TokenType.R_PAREN, advance=False): 5673 this = self.expression(exp.Tuple) 5674 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5675 this = self._parse_subquery(this=this, parse_alias=False) 5676 elif isinstance(this, exp.Subquery): 5677 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5678 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5679 this = self.expression(exp.Tuple, expressions=expressions) 5680 else: 5681 this = self.expression(exp.Paren, this=this) 5682 5683 if this: 5684 this.add_comments(comments) 5685 5686 self._match_r_paren(expression=this) 5687 return this 5688 5689 def _parse_primary(self) -> t.Optional[exp.Expression]: 5690 if self._match_set(self.PRIMARY_PARSERS): 5691 token_type = self._prev.token_type 5692 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5693 5694 if token_type == TokenType.STRING: 5695 expressions = [primary] 5696 while self._match(TokenType.STRING): 5697 expressions.append(exp.Literal.string(self._prev.text)) 5698 5699 if len(expressions) > 1: 5700 return self.expression(exp.Concat, expressions=expressions) 5701 5702 return primary 5703 5704 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5705 return exp.Literal.number(f"0.{self._prev.text}") 5706 5707 return self._parse_paren() 5708 5709 def _parse_field( 5710 self, 5711 any_token: bool = False, 5712 tokens: t.Optional[t.Collection[TokenType]] = None, 5713 anonymous_func: bool = False, 5714 ) -> t.Optional[exp.Expression]: 5715 if anonymous_func: 5716 field = ( 5717 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5718 or self._parse_primary() 5719 ) 5720 else: 5721 field = self._parse_primary() or self._parse_function( 5722 anonymous=anonymous_func, any_token=any_token 5723 ) 5724 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5725 5726 def _parse_function( 5727 self, 5728 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5729 anonymous: bool = False, 5730 optional_parens: bool = True, 5731 any_token: bool = False, 5732 ) -> t.Optional[exp.Expression]: 5733 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5734 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5735 fn_syntax = False 5736 if ( 5737 self._match(TokenType.L_BRACE, advance=False) 5738 and self._next 5739 and self._next.text.upper() == "FN" 5740 ): 5741 self._advance(2) 5742 fn_syntax = True 5743 5744 func = self._parse_function_call( 5745 functions=functions, 5746 anonymous=anonymous, 5747 optional_parens=optional_parens, 5748 any_token=any_token, 5749 ) 5750 5751 if fn_syntax: 5752 self._match(TokenType.R_BRACE) 5753 5754 return func 5755 5756 def _parse_function_call( 5757 self, 5758 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5759 anonymous: bool = False, 5760 optional_parens: bool = True, 5761 any_token: bool = False, 5762 ) -> t.Optional[exp.Expression]: 5763 if not self._curr: 5764 return None 5765 5766 comments = self._curr.comments 5767 token = self._curr 5768 token_type = self._curr.token_type 5769 this = self._curr.text 5770 upper = this.upper() 5771 5772 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5773 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5774 self._advance() 5775 return self._parse_window(parser(self)) 5776 5777 if not self._next or self._next.token_type != TokenType.L_PAREN: 5778 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5779 self._advance() 5780 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5781 5782 return None 5783 5784 if any_token: 5785 if token_type in self.RESERVED_TOKENS: 5786 return None 5787 elif token_type not in self.FUNC_TOKENS: 5788 return None 5789 5790 self._advance(2) 5791 5792 parser = self.FUNCTION_PARSERS.get(upper) 5793 if parser and not anonymous: 5794 this = parser(self) 5795 else: 5796 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5797 5798 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5799 this = self.expression( 5800 subquery_predicate, comments=comments, this=self._parse_select() 5801 ) 5802 self._match_r_paren() 5803 return this 5804 5805 if functions is None: 5806 functions = self.FUNCTIONS 5807 5808 function = functions.get(upper) 5809 known_function = function and not anonymous 5810 5811 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5812 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5813 5814 post_func_comments = self._curr and self._curr.comments 5815 if known_function and post_func_comments: 5816 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5817 # call we'll construct it as exp.Anonymous, even if it's "known" 5818 if any( 5819 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5820 for comment in post_func_comments 5821 ): 5822 known_function = False 5823 5824 if alias and known_function: 5825 args = self._kv_to_prop_eq(args) 5826 5827 if known_function: 5828 func_builder = t.cast(t.Callable, function) 5829 5830 if "dialect" in func_builder.__code__.co_varnames: 5831 func = func_builder(args, dialect=self.dialect) 5832 else: 5833 func = func_builder(args) 5834 5835 func = self.validate_expression(func, args) 5836 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5837 func.meta["name"] = this 5838 5839 this = func 5840 else: 5841 if token_type == TokenType.IDENTIFIER: 5842 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5843 5844 this = self.expression(exp.Anonymous, this=this, expressions=args) 5845 this = this.update_positions(token) 5846 5847 if isinstance(this, exp.Expression): 5848 this.add_comments(comments) 5849 5850 self._match_r_paren(this) 5851 return self._parse_window(this) 5852 5853 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5854 return expression 5855 5856 def _kv_to_prop_eq( 5857 self, expressions: t.List[exp.Expression], parse_map: bool = False 5858 ) -> t.List[exp.Expression]: 5859 transformed = [] 5860 5861 for index, e in enumerate(expressions): 5862 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5863 if isinstance(e, exp.Alias): 5864 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5865 5866 if not isinstance(e, exp.PropertyEQ): 5867 e = self.expression( 5868 exp.PropertyEQ, 5869 this=e.this if parse_map else exp.to_identifier(e.this.name), 5870 expression=e.expression, 5871 ) 5872 5873 if isinstance(e.this, exp.Column): 5874 e.this.replace(e.this.this) 5875 else: 5876 e = self._to_prop_eq(e, index) 5877 5878 transformed.append(e) 5879 5880 return transformed 5881 5882 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5883 return self._parse_statement() 5884 5885 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5886 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5887 5888 def _parse_user_defined_function( 5889 self, kind: t.Optional[TokenType] = None 5890 ) -> t.Optional[exp.Expression]: 5891 this = self._parse_table_parts(schema=True) 5892 5893 if not self._match(TokenType.L_PAREN): 5894 return this 5895 5896 expressions = self._parse_csv(self._parse_function_parameter) 5897 self._match_r_paren() 5898 return self.expression( 5899 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5900 ) 5901 5902 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5903 literal = self._parse_primary() 5904 if literal: 5905 return self.expression(exp.Introducer, this=token.text, expression=literal) 5906 5907 return self._identifier_expression(token) 5908 5909 def _parse_session_parameter(self) -> exp.SessionParameter: 5910 kind = None 5911 this = self._parse_id_var() or self._parse_primary() 5912 5913 if this and self._match(TokenType.DOT): 5914 kind = this.name 5915 this = self._parse_var() or self._parse_primary() 5916 5917 return self.expression(exp.SessionParameter, this=this, kind=kind) 5918 5919 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5920 return self._parse_id_var() 5921 5922 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5923 index = self._index 5924 5925 if self._match(TokenType.L_PAREN): 5926 expressions = t.cast( 5927 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5928 ) 5929 5930 if not self._match(TokenType.R_PAREN): 5931 self._retreat(index) 5932 else: 5933 expressions = [self._parse_lambda_arg()] 5934 5935 if self._match_set(self.LAMBDAS): 5936 return self.LAMBDAS[self._prev.token_type](self, expressions) 5937 5938 self._retreat(index) 5939 5940 this: t.Optional[exp.Expression] 5941 5942 if self._match(TokenType.DISTINCT): 5943 this = self.expression( 5944 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5945 ) 5946 else: 5947 this = self._parse_select_or_expression(alias=alias) 5948 5949 return self._parse_limit( 5950 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5951 ) 5952 5953 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5954 index = self._index 5955 if not self._match(TokenType.L_PAREN): 5956 return this 5957 5958 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5959 # expr can be of both types 5960 if self._match_set(self.SELECT_START_TOKENS): 5961 self._retreat(index) 5962 return this 5963 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5964 self._match_r_paren() 5965 return self.expression(exp.Schema, this=this, expressions=args) 5966 5967 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5968 return self._parse_column_def(self._parse_field(any_token=True)) 5969 5970 def _parse_column_def( 5971 self, this: t.Optional[exp.Expression], computed_column: bool = True 5972 ) -> t.Optional[exp.Expression]: 5973 # column defs are not really columns, they're identifiers 5974 if isinstance(this, exp.Column): 5975 this = this.this 5976 5977 if not computed_column: 5978 self._match(TokenType.ALIAS) 5979 5980 kind = self._parse_types(schema=True) 5981 5982 if self._match_text_seq("FOR", "ORDINALITY"): 5983 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5984 5985 constraints: t.List[exp.Expression] = [] 5986 5987 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5988 ("ALIAS", "MATERIALIZED") 5989 ): 5990 persisted = self._prev.text.upper() == "MATERIALIZED" 5991 constraint_kind = exp.ComputedColumnConstraint( 5992 this=self._parse_assignment(), 5993 persisted=persisted or self._match_text_seq("PERSISTED"), 5994 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5995 ) 5996 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5997 elif ( 5998 kind 5999 and self._match(TokenType.ALIAS, advance=False) 6000 and ( 6001 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6002 or (self._next and self._next.token_type == TokenType.L_PAREN) 6003 ) 6004 ): 6005 self._advance() 6006 constraints.append( 6007 self.expression( 6008 exp.ColumnConstraint, 6009 kind=exp.ComputedColumnConstraint( 6010 this=self._parse_disjunction(), 6011 persisted=self._match_texts(("STORED", "VIRTUAL")) 6012 and self._prev.text.upper() == "STORED", 6013 ), 6014 ) 6015 ) 6016 6017 while True: 6018 constraint = self._parse_column_constraint() 6019 if not constraint: 6020 break 6021 constraints.append(constraint) 6022 6023 if not kind and not constraints: 6024 return this 6025 6026 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6027 6028 def _parse_auto_increment( 6029 self, 6030 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6031 start = None 6032 increment = None 6033 order = None 6034 6035 if self._match(TokenType.L_PAREN, advance=False): 6036 args = self._parse_wrapped_csv(self._parse_bitwise) 6037 start = seq_get(args, 0) 6038 increment = seq_get(args, 1) 6039 elif self._match_text_seq("START"): 6040 start = self._parse_bitwise() 6041 self._match_text_seq("INCREMENT") 6042 increment = self._parse_bitwise() 6043 if self._match_text_seq("ORDER"): 6044 order = True 6045 elif self._match_text_seq("NOORDER"): 6046 order = False 6047 6048 if start and increment: 6049 return exp.GeneratedAsIdentityColumnConstraint( 6050 start=start, increment=increment, this=False, order=order 6051 ) 6052 6053 return exp.AutoIncrementColumnConstraint() 6054 6055 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6056 if not self._match_text_seq("REFRESH"): 6057 self._retreat(self._index - 1) 6058 return None 6059 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6060 6061 def _parse_compress(self) -> exp.CompressColumnConstraint: 6062 if self._match(TokenType.L_PAREN, advance=False): 6063 return self.expression( 6064 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6065 ) 6066 6067 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6068 6069 def _parse_generated_as_identity( 6070 self, 6071 ) -> ( 6072 exp.GeneratedAsIdentityColumnConstraint 6073 | exp.ComputedColumnConstraint 6074 | exp.GeneratedAsRowColumnConstraint 6075 ): 6076 if self._match_text_seq("BY", "DEFAULT"): 6077 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6078 this = self.expression( 6079 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6080 ) 6081 else: 6082 self._match_text_seq("ALWAYS") 6083 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6084 6085 self._match(TokenType.ALIAS) 6086 6087 if self._match_text_seq("ROW"): 6088 start = self._match_text_seq("START") 6089 if not start: 6090 self._match(TokenType.END) 6091 hidden = self._match_text_seq("HIDDEN") 6092 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6093 6094 identity = self._match_text_seq("IDENTITY") 6095 6096 if self._match(TokenType.L_PAREN): 6097 if self._match(TokenType.START_WITH): 6098 this.set("start", self._parse_bitwise()) 6099 if self._match_text_seq("INCREMENT", "BY"): 6100 this.set("increment", self._parse_bitwise()) 6101 if self._match_text_seq("MINVALUE"): 6102 this.set("minvalue", self._parse_bitwise()) 6103 if self._match_text_seq("MAXVALUE"): 6104 this.set("maxvalue", self._parse_bitwise()) 6105 6106 if self._match_text_seq("CYCLE"): 6107 this.set("cycle", True) 6108 elif self._match_text_seq("NO", "CYCLE"): 6109 this.set("cycle", False) 6110 6111 if not identity: 6112 this.set("expression", self._parse_range()) 6113 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6114 args = self._parse_csv(self._parse_bitwise) 6115 this.set("start", seq_get(args, 0)) 6116 this.set("increment", seq_get(args, 1)) 6117 6118 self._match_r_paren() 6119 6120 return this 6121 6122 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6123 self._match_text_seq("LENGTH") 6124 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6125 6126 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6127 if self._match_text_seq("NULL"): 6128 return self.expression(exp.NotNullColumnConstraint) 6129 if self._match_text_seq("CASESPECIFIC"): 6130 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6131 if self._match_text_seq("FOR", "REPLICATION"): 6132 return self.expression(exp.NotForReplicationColumnConstraint) 6133 6134 # Unconsume the `NOT` token 6135 self._retreat(self._index - 1) 6136 return None 6137 6138 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6139 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6140 6141 procedure_option_follows = ( 6142 self._match(TokenType.WITH, advance=False) 6143 and self._next 6144 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6145 ) 6146 6147 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6148 return self.expression( 6149 exp.ColumnConstraint, 6150 this=this, 6151 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6152 ) 6153 6154 return this 6155 6156 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6157 if not self._match(TokenType.CONSTRAINT): 6158 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6159 6160 return self.expression( 6161 exp.Constraint, 6162 this=self._parse_id_var(), 6163 expressions=self._parse_unnamed_constraints(), 6164 ) 6165 6166 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6167 constraints = [] 6168 while True: 6169 constraint = self._parse_unnamed_constraint() or self._parse_function() 6170 if not constraint: 6171 break 6172 constraints.append(constraint) 6173 6174 return constraints 6175 6176 def _parse_unnamed_constraint( 6177 self, constraints: t.Optional[t.Collection[str]] = None 6178 ) -> t.Optional[exp.Expression]: 6179 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6180 constraints or self.CONSTRAINT_PARSERS 6181 ): 6182 return None 6183 6184 constraint = self._prev.text.upper() 6185 if constraint not in self.CONSTRAINT_PARSERS: 6186 self.raise_error(f"No parser found for schema constraint {constraint}.") 6187 6188 return self.CONSTRAINT_PARSERS[constraint](self) 6189 6190 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6191 return self._parse_id_var(any_token=False) 6192 6193 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6194 self._match_text_seq("KEY") 6195 return self.expression( 6196 exp.UniqueColumnConstraint, 6197 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6198 this=self._parse_schema(self._parse_unique_key()), 6199 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6200 on_conflict=self._parse_on_conflict(), 6201 options=self._parse_key_constraint_options(), 6202 ) 6203 6204 def _parse_key_constraint_options(self) -> t.List[str]: 6205 options = [] 6206 while True: 6207 if not self._curr: 6208 break 6209 6210 if self._match(TokenType.ON): 6211 action = None 6212 on = self._advance_any() and self._prev.text 6213 6214 if self._match_text_seq("NO", "ACTION"): 6215 action = "NO ACTION" 6216 elif self._match_text_seq("CASCADE"): 6217 action = "CASCADE" 6218 elif self._match_text_seq("RESTRICT"): 6219 action = "RESTRICT" 6220 elif self._match_pair(TokenType.SET, TokenType.NULL): 6221 action = "SET NULL" 6222 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6223 action = "SET DEFAULT" 6224 else: 6225 self.raise_error("Invalid key constraint") 6226 6227 options.append(f"ON {on} {action}") 6228 else: 6229 var = self._parse_var_from_options( 6230 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6231 ) 6232 if not var: 6233 break 6234 options.append(var.name) 6235 6236 return options 6237 6238 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6239 if match and not self._match(TokenType.REFERENCES): 6240 return None 6241 6242 expressions = None 6243 this = self._parse_table(schema=True) 6244 options = self._parse_key_constraint_options() 6245 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6246 6247 def _parse_foreign_key(self) -> exp.ForeignKey: 6248 expressions = ( 6249 self._parse_wrapped_id_vars() 6250 if not self._match(TokenType.REFERENCES, advance=False) 6251 else None 6252 ) 6253 reference = self._parse_references() 6254 on_options = {} 6255 6256 while self._match(TokenType.ON): 6257 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6258 self.raise_error("Expected DELETE or UPDATE") 6259 6260 kind = self._prev.text.lower() 6261 6262 if self._match_text_seq("NO", "ACTION"): 6263 action = "NO ACTION" 6264 elif self._match(TokenType.SET): 6265 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6266 action = "SET " + self._prev.text.upper() 6267 else: 6268 self._advance() 6269 action = self._prev.text.upper() 6270 6271 on_options[kind] = action 6272 6273 return self.expression( 6274 exp.ForeignKey, 6275 expressions=expressions, 6276 reference=reference, 6277 options=self._parse_key_constraint_options(), 6278 **on_options, # type: ignore 6279 ) 6280 6281 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6282 return self._parse_ordered() or self._parse_field() 6283 6284 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6285 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6286 self._retreat(self._index - 1) 6287 return None 6288 6289 id_vars = self._parse_wrapped_id_vars() 6290 return self.expression( 6291 exp.PeriodForSystemTimeConstraint, 6292 this=seq_get(id_vars, 0), 6293 expression=seq_get(id_vars, 1), 6294 ) 6295 6296 def _parse_primary_key( 6297 self, wrapped_optional: bool = False, in_props: bool = False 6298 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6299 desc = ( 6300 self._match_set((TokenType.ASC, TokenType.DESC)) 6301 and self._prev.token_type == TokenType.DESC 6302 ) 6303 6304 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6305 return self.expression( 6306 exp.PrimaryKeyColumnConstraint, 6307 desc=desc, 6308 options=self._parse_key_constraint_options(), 6309 ) 6310 6311 expressions = self._parse_wrapped_csv( 6312 self._parse_primary_key_part, optional=wrapped_optional 6313 ) 6314 6315 return self.expression( 6316 exp.PrimaryKey, 6317 expressions=expressions, 6318 include=self._parse_index_params(), 6319 options=self._parse_key_constraint_options(), 6320 ) 6321 6322 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6323 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6324 6325 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6326 """ 6327 Parses a datetime column in ODBC format. We parse the column into the corresponding 6328 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6329 same as we did for `DATE('yyyy-mm-dd')`. 6330 6331 Reference: 6332 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6333 """ 6334 self._match(TokenType.VAR) 6335 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6336 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6337 if not self._match(TokenType.R_BRACE): 6338 self.raise_error("Expected }") 6339 return expression 6340 6341 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6342 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6343 return this 6344 6345 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6346 map_token = seq_get(self._tokens, self._index - 2) 6347 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6348 else: 6349 parse_map = False 6350 6351 bracket_kind = self._prev.token_type 6352 if ( 6353 bracket_kind == TokenType.L_BRACE 6354 and self._curr 6355 and self._curr.token_type == TokenType.VAR 6356 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6357 ): 6358 return self._parse_odbc_datetime_literal() 6359 6360 expressions = self._parse_csv( 6361 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6362 ) 6363 6364 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6365 self.raise_error("Expected ]") 6366 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6367 self.raise_error("Expected }") 6368 6369 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6370 if bracket_kind == TokenType.L_BRACE: 6371 this = self.expression( 6372 exp.Struct, 6373 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6374 ) 6375 elif not this: 6376 this = build_array_constructor( 6377 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6378 ) 6379 else: 6380 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6381 if constructor_type: 6382 return build_array_constructor( 6383 constructor_type, 6384 args=expressions, 6385 bracket_kind=bracket_kind, 6386 dialect=self.dialect, 6387 ) 6388 6389 expressions = apply_index_offset( 6390 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6391 ) 6392 this = self.expression( 6393 exp.Bracket, 6394 this=this, 6395 expressions=expressions, 6396 comments=this.pop_comments(), 6397 ) 6398 6399 self._add_comments(this) 6400 return self._parse_bracket(this) 6401 6402 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6403 if self._match(TokenType.COLON): 6404 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6405 return this 6406 6407 def _parse_case(self) -> t.Optional[exp.Expression]: 6408 ifs = [] 6409 default = None 6410 6411 comments = self._prev_comments 6412 expression = self._parse_assignment() 6413 6414 while self._match(TokenType.WHEN): 6415 this = self._parse_assignment() 6416 self._match(TokenType.THEN) 6417 then = self._parse_assignment() 6418 ifs.append(self.expression(exp.If, this=this, true=then)) 6419 6420 if self._match(TokenType.ELSE): 6421 default = self._parse_assignment() 6422 6423 if not self._match(TokenType.END): 6424 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6425 default = exp.column("interval") 6426 else: 6427 self.raise_error("Expected END after CASE", self._prev) 6428 6429 return self.expression( 6430 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6431 ) 6432 6433 def _parse_if(self) -> t.Optional[exp.Expression]: 6434 if self._match(TokenType.L_PAREN): 6435 args = self._parse_csv( 6436 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6437 ) 6438 this = self.validate_expression(exp.If.from_arg_list(args), args) 6439 self._match_r_paren() 6440 else: 6441 index = self._index - 1 6442 6443 if self.NO_PAREN_IF_COMMANDS and index == 0: 6444 return self._parse_as_command(self._prev) 6445 6446 condition = self._parse_assignment() 6447 6448 if not condition: 6449 self._retreat(index) 6450 return None 6451 6452 self._match(TokenType.THEN) 6453 true = self._parse_assignment() 6454 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6455 self._match(TokenType.END) 6456 this = self.expression(exp.If, this=condition, true=true, false=false) 6457 6458 return this 6459 6460 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6461 if not self._match_text_seq("VALUE", "FOR"): 6462 self._retreat(self._index - 1) 6463 return None 6464 6465 return self.expression( 6466 exp.NextValueFor, 6467 this=self._parse_column(), 6468 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6469 ) 6470 6471 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6472 this = self._parse_function() or self._parse_var_or_string(upper=True) 6473 6474 if self._match(TokenType.FROM): 6475 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6476 6477 if not self._match(TokenType.COMMA): 6478 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6479 6480 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6481 6482 def _parse_gap_fill(self) -> exp.GapFill: 6483 self._match(TokenType.TABLE) 6484 this = self._parse_table() 6485 6486 self._match(TokenType.COMMA) 6487 args = [this, *self._parse_csv(self._parse_lambda)] 6488 6489 gap_fill = exp.GapFill.from_arg_list(args) 6490 return self.validate_expression(gap_fill, args) 6491 6492 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6493 this = self._parse_assignment() 6494 6495 if not self._match(TokenType.ALIAS): 6496 if self._match(TokenType.COMMA): 6497 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6498 6499 self.raise_error("Expected AS after CAST") 6500 6501 fmt = None 6502 to = self._parse_types() 6503 6504 default = self._match(TokenType.DEFAULT) 6505 if default: 6506 default = self._parse_bitwise() 6507 self._match_text_seq("ON", "CONVERSION", "ERROR") 6508 6509 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6510 fmt_string = self._parse_string() 6511 fmt = self._parse_at_time_zone(fmt_string) 6512 6513 if not to: 6514 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6515 if to.this in exp.DataType.TEMPORAL_TYPES: 6516 this = self.expression( 6517 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6518 this=this, 6519 format=exp.Literal.string( 6520 format_time( 6521 fmt_string.this if fmt_string else "", 6522 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6523 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6524 ) 6525 ), 6526 safe=safe, 6527 ) 6528 6529 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6530 this.set("zone", fmt.args["zone"]) 6531 return this 6532 elif not to: 6533 self.raise_error("Expected TYPE after CAST") 6534 elif isinstance(to, exp.Identifier): 6535 to = exp.DataType.build(to.name, udt=True) 6536 elif to.this == exp.DataType.Type.CHAR: 6537 if self._match(TokenType.CHARACTER_SET): 6538 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6539 6540 return self.expression( 6541 exp.Cast if strict else exp.TryCast, 6542 this=this, 6543 to=to, 6544 format=fmt, 6545 safe=safe, 6546 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6547 default=default, 6548 ) 6549 6550 def _parse_string_agg(self) -> exp.GroupConcat: 6551 if self._match(TokenType.DISTINCT): 6552 args: t.List[t.Optional[exp.Expression]] = [ 6553 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6554 ] 6555 if self._match(TokenType.COMMA): 6556 args.extend(self._parse_csv(self._parse_assignment)) 6557 else: 6558 args = self._parse_csv(self._parse_assignment) # type: ignore 6559 6560 if self._match_text_seq("ON", "OVERFLOW"): 6561 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6562 if self._match_text_seq("ERROR"): 6563 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6564 else: 6565 self._match_text_seq("TRUNCATE") 6566 on_overflow = self.expression( 6567 exp.OverflowTruncateBehavior, 6568 this=self._parse_string(), 6569 with_count=( 6570 self._match_text_seq("WITH", "COUNT") 6571 or not self._match_text_seq("WITHOUT", "COUNT") 6572 ), 6573 ) 6574 else: 6575 on_overflow = None 6576 6577 index = self._index 6578 if not self._match(TokenType.R_PAREN) and args: 6579 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6580 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6581 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6582 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6583 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6584 6585 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6586 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6587 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6588 if not self._match_text_seq("WITHIN", "GROUP"): 6589 self._retreat(index) 6590 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6591 6592 # The corresponding match_r_paren will be called in parse_function (caller) 6593 self._match_l_paren() 6594 6595 return self.expression( 6596 exp.GroupConcat, 6597 this=self._parse_order(this=seq_get(args, 0)), 6598 separator=seq_get(args, 1), 6599 on_overflow=on_overflow, 6600 ) 6601 6602 def _parse_convert( 6603 self, strict: bool, safe: t.Optional[bool] = None 6604 ) -> t.Optional[exp.Expression]: 6605 this = self._parse_bitwise() 6606 6607 if self._match(TokenType.USING): 6608 to: t.Optional[exp.Expression] = self.expression( 6609 exp.CharacterSet, this=self._parse_var() 6610 ) 6611 elif self._match(TokenType.COMMA): 6612 to = self._parse_types() 6613 else: 6614 to = None 6615 6616 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6617 6618 def _parse_xml_table(self) -> exp.XMLTable: 6619 namespaces = None 6620 passing = None 6621 columns = None 6622 6623 if self._match_text_seq("XMLNAMESPACES", "("): 6624 namespaces = self._parse_xml_namespace() 6625 self._match_text_seq(")", ",") 6626 6627 this = self._parse_string() 6628 6629 if self._match_text_seq("PASSING"): 6630 # The BY VALUE keywords are optional and are provided for semantic clarity 6631 self._match_text_seq("BY", "VALUE") 6632 passing = self._parse_csv(self._parse_column) 6633 6634 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6635 6636 if self._match_text_seq("COLUMNS"): 6637 columns = self._parse_csv(self._parse_field_def) 6638 6639 return self.expression( 6640 exp.XMLTable, 6641 this=this, 6642 namespaces=namespaces, 6643 passing=passing, 6644 columns=columns, 6645 by_ref=by_ref, 6646 ) 6647 6648 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6649 namespaces = [] 6650 6651 while True: 6652 if self._match(TokenType.DEFAULT): 6653 uri = self._parse_string() 6654 else: 6655 uri = self._parse_alias(self._parse_string()) 6656 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6657 if not self._match(TokenType.COMMA): 6658 break 6659 6660 return namespaces 6661 6662 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6663 args = self._parse_csv(self._parse_assignment) 6664 6665 if len(args) < 3: 6666 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6667 6668 return self.expression(exp.DecodeCase, expressions=args) 6669 6670 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6671 self._match_text_seq("KEY") 6672 key = self._parse_column() 6673 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6674 self._match_text_seq("VALUE") 6675 value = self._parse_bitwise() 6676 6677 if not key and not value: 6678 return None 6679 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6680 6681 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6682 if not this or not self._match_text_seq("FORMAT", "JSON"): 6683 return this 6684 6685 return self.expression(exp.FormatJson, this=this) 6686 6687 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6688 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6689 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6690 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6691 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6692 else: 6693 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6694 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6695 6696 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6697 6698 if not empty and not error and not null: 6699 return None 6700 6701 return self.expression( 6702 exp.OnCondition, 6703 empty=empty, 6704 error=error, 6705 null=null, 6706 ) 6707 6708 def _parse_on_handling( 6709 self, on: str, *values: str 6710 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6711 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6712 for value in values: 6713 if self._match_text_seq(value, "ON", on): 6714 return f"{value} ON {on}" 6715 6716 index = self._index 6717 if self._match(TokenType.DEFAULT): 6718 default_value = self._parse_bitwise() 6719 if self._match_text_seq("ON", on): 6720 return default_value 6721 6722 self._retreat(index) 6723 6724 return None 6725 6726 @t.overload 6727 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6728 6729 @t.overload 6730 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6731 6732 def _parse_json_object(self, agg=False): 6733 star = self._parse_star() 6734 expressions = ( 6735 [star] 6736 if star 6737 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6738 ) 6739 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6740 6741 unique_keys = None 6742 if self._match_text_seq("WITH", "UNIQUE"): 6743 unique_keys = True 6744 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6745 unique_keys = False 6746 6747 self._match_text_seq("KEYS") 6748 6749 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6750 self._parse_type() 6751 ) 6752 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6753 6754 return self.expression( 6755 exp.JSONObjectAgg if agg else exp.JSONObject, 6756 expressions=expressions, 6757 null_handling=null_handling, 6758 unique_keys=unique_keys, 6759 return_type=return_type, 6760 encoding=encoding, 6761 ) 6762 6763 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6764 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6765 if not self._match_text_seq("NESTED"): 6766 this = self._parse_id_var() 6767 kind = self._parse_types(allow_identifiers=False) 6768 nested = None 6769 else: 6770 this = None 6771 kind = None 6772 nested = True 6773 6774 path = self._match_text_seq("PATH") and self._parse_string() 6775 nested_schema = nested and self._parse_json_schema() 6776 6777 return self.expression( 6778 exp.JSONColumnDef, 6779 this=this, 6780 kind=kind, 6781 path=path, 6782 nested_schema=nested_schema, 6783 ) 6784 6785 def _parse_json_schema(self) -> exp.JSONSchema: 6786 self._match_text_seq("COLUMNS") 6787 return self.expression( 6788 exp.JSONSchema, 6789 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6790 ) 6791 6792 def _parse_json_table(self) -> exp.JSONTable: 6793 this = self._parse_format_json(self._parse_bitwise()) 6794 path = self._match(TokenType.COMMA) and self._parse_string() 6795 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6796 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6797 schema = self._parse_json_schema() 6798 6799 return exp.JSONTable( 6800 this=this, 6801 schema=schema, 6802 path=path, 6803 error_handling=error_handling, 6804 empty_handling=empty_handling, 6805 ) 6806 6807 def _parse_match_against(self) -> exp.MatchAgainst: 6808 expressions = self._parse_csv(self._parse_column) 6809 6810 self._match_text_seq(")", "AGAINST", "(") 6811 6812 this = self._parse_string() 6813 6814 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6815 modifier = "IN NATURAL LANGUAGE MODE" 6816 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6817 modifier = f"{modifier} WITH QUERY EXPANSION" 6818 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6819 modifier = "IN BOOLEAN MODE" 6820 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6821 modifier = "WITH QUERY EXPANSION" 6822 else: 6823 modifier = None 6824 6825 return self.expression( 6826 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6827 ) 6828 6829 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6830 def _parse_open_json(self) -> exp.OpenJSON: 6831 this = self._parse_bitwise() 6832 path = self._match(TokenType.COMMA) and self._parse_string() 6833 6834 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6835 this = self._parse_field(any_token=True) 6836 kind = self._parse_types() 6837 path = self._parse_string() 6838 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6839 6840 return self.expression( 6841 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6842 ) 6843 6844 expressions = None 6845 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6846 self._match_l_paren() 6847 expressions = self._parse_csv(_parse_open_json_column_def) 6848 6849 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6850 6851 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6852 args = self._parse_csv(self._parse_bitwise) 6853 6854 if self._match(TokenType.IN): 6855 return self.expression( 6856 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6857 ) 6858 6859 if haystack_first: 6860 haystack = seq_get(args, 0) 6861 needle = seq_get(args, 1) 6862 else: 6863 haystack = seq_get(args, 1) 6864 needle = seq_get(args, 0) 6865 6866 return self.expression( 6867 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6868 ) 6869 6870 def _parse_predict(self) -> exp.Predict: 6871 self._match_text_seq("MODEL") 6872 this = self._parse_table() 6873 6874 self._match(TokenType.COMMA) 6875 self._match_text_seq("TABLE") 6876 6877 return self.expression( 6878 exp.Predict, 6879 this=this, 6880 expression=self._parse_table(), 6881 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6882 ) 6883 6884 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6885 args = self._parse_csv(self._parse_table) 6886 return exp.JoinHint(this=func_name.upper(), expressions=args) 6887 6888 def _parse_substring(self) -> exp.Substring: 6889 # Postgres supports the form: substring(string [from int] [for int]) 6890 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6891 6892 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6893 6894 if self._match(TokenType.FROM): 6895 args.append(self._parse_bitwise()) 6896 if self._match(TokenType.FOR): 6897 if len(args) == 1: 6898 args.append(exp.Literal.number(1)) 6899 args.append(self._parse_bitwise()) 6900 6901 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6902 6903 def _parse_trim(self) -> exp.Trim: 6904 # https://www.w3resource.com/sql/character-functions/trim.php 6905 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6906 6907 position = None 6908 collation = None 6909 expression = None 6910 6911 if self._match_texts(self.TRIM_TYPES): 6912 position = self._prev.text.upper() 6913 6914 this = self._parse_bitwise() 6915 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6916 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6917 expression = self._parse_bitwise() 6918 6919 if invert_order: 6920 this, expression = expression, this 6921 6922 if self._match(TokenType.COLLATE): 6923 collation = self._parse_bitwise() 6924 6925 return self.expression( 6926 exp.Trim, this=this, position=position, expression=expression, collation=collation 6927 ) 6928 6929 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6930 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6931 6932 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6933 return self._parse_window(self._parse_id_var(), alias=True) 6934 6935 def _parse_respect_or_ignore_nulls( 6936 self, this: t.Optional[exp.Expression] 6937 ) -> t.Optional[exp.Expression]: 6938 if self._match_text_seq("IGNORE", "NULLS"): 6939 return self.expression(exp.IgnoreNulls, this=this) 6940 if self._match_text_seq("RESPECT", "NULLS"): 6941 return self.expression(exp.RespectNulls, this=this) 6942 return this 6943 6944 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6945 if self._match(TokenType.HAVING): 6946 self._match_texts(("MAX", "MIN")) 6947 max = self._prev.text.upper() != "MIN" 6948 return self.expression( 6949 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6950 ) 6951 6952 return this 6953 6954 def _parse_window( 6955 self, this: t.Optional[exp.Expression], alias: bool = False 6956 ) -> t.Optional[exp.Expression]: 6957 func = this 6958 comments = func.comments if isinstance(func, exp.Expression) else None 6959 6960 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6961 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6962 if self._match_text_seq("WITHIN", "GROUP"): 6963 order = self._parse_wrapped(self._parse_order) 6964 this = self.expression(exp.WithinGroup, this=this, expression=order) 6965 6966 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6967 self._match(TokenType.WHERE) 6968 this = self.expression( 6969 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6970 ) 6971 self._match_r_paren() 6972 6973 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6974 # Some dialects choose to implement and some do not. 6975 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6976 6977 # There is some code above in _parse_lambda that handles 6978 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6979 6980 # The below changes handle 6981 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6982 6983 # Oracle allows both formats 6984 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6985 # and Snowflake chose to do the same for familiarity 6986 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6987 if isinstance(this, exp.AggFunc): 6988 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6989 6990 if ignore_respect and ignore_respect is not this: 6991 ignore_respect.replace(ignore_respect.this) 6992 this = self.expression(ignore_respect.__class__, this=this) 6993 6994 this = self._parse_respect_or_ignore_nulls(this) 6995 6996 # bigquery select from window x AS (partition by ...) 6997 if alias: 6998 over = None 6999 self._match(TokenType.ALIAS) 7000 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7001 return this 7002 else: 7003 over = self._prev.text.upper() 7004 7005 if comments and isinstance(func, exp.Expression): 7006 func.pop_comments() 7007 7008 if not self._match(TokenType.L_PAREN): 7009 return self.expression( 7010 exp.Window, 7011 comments=comments, 7012 this=this, 7013 alias=self._parse_id_var(False), 7014 over=over, 7015 ) 7016 7017 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7018 7019 first = self._match(TokenType.FIRST) 7020 if self._match_text_seq("LAST"): 7021 first = False 7022 7023 partition, order = self._parse_partition_and_order() 7024 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7025 7026 if kind: 7027 self._match(TokenType.BETWEEN) 7028 start = self._parse_window_spec() 7029 self._match(TokenType.AND) 7030 end = self._parse_window_spec() 7031 exclude = ( 7032 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7033 if self._match_text_seq("EXCLUDE") 7034 else None 7035 ) 7036 7037 spec = self.expression( 7038 exp.WindowSpec, 7039 kind=kind, 7040 start=start["value"], 7041 start_side=start["side"], 7042 end=end["value"], 7043 end_side=end["side"], 7044 exclude=exclude, 7045 ) 7046 else: 7047 spec = None 7048 7049 self._match_r_paren() 7050 7051 window = self.expression( 7052 exp.Window, 7053 comments=comments, 7054 this=this, 7055 partition_by=partition, 7056 order=order, 7057 spec=spec, 7058 alias=window_alias, 7059 over=over, 7060 first=first, 7061 ) 7062 7063 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7064 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7065 return self._parse_window(window, alias=alias) 7066 7067 return window 7068 7069 def _parse_partition_and_order( 7070 self, 7071 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7072 return self._parse_partition_by(), self._parse_order() 7073 7074 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7075 self._match(TokenType.BETWEEN) 7076 7077 return { 7078 "value": ( 7079 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7080 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7081 or self._parse_bitwise() 7082 ), 7083 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7084 } 7085 7086 def _parse_alias( 7087 self, this: t.Optional[exp.Expression], explicit: bool = False 7088 ) -> t.Optional[exp.Expression]: 7089 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7090 # so this section tries to parse the clause version and if it fails, it treats the token 7091 # as an identifier (alias) 7092 if self._can_parse_limit_or_offset(): 7093 return this 7094 7095 any_token = self._match(TokenType.ALIAS) 7096 comments = self._prev_comments or [] 7097 7098 if explicit and not any_token: 7099 return this 7100 7101 if self._match(TokenType.L_PAREN): 7102 aliases = self.expression( 7103 exp.Aliases, 7104 comments=comments, 7105 this=this, 7106 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7107 ) 7108 self._match_r_paren(aliases) 7109 return aliases 7110 7111 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7112 self.STRING_ALIASES and self._parse_string_as_identifier() 7113 ) 7114 7115 if alias: 7116 comments.extend(alias.pop_comments()) 7117 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7118 column = this.this 7119 7120 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7121 if not this.comments and column and column.comments: 7122 this.comments = column.pop_comments() 7123 7124 return this 7125 7126 def _parse_id_var( 7127 self, 7128 any_token: bool = True, 7129 tokens: t.Optional[t.Collection[TokenType]] = None, 7130 ) -> t.Optional[exp.Expression]: 7131 expression = self._parse_identifier() 7132 if not expression and ( 7133 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7134 ): 7135 quoted = self._prev.token_type == TokenType.STRING 7136 expression = self._identifier_expression(quoted=quoted) 7137 7138 return expression 7139 7140 def _parse_string(self) -> t.Optional[exp.Expression]: 7141 if self._match_set(self.STRING_PARSERS): 7142 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7143 return self._parse_placeholder() 7144 7145 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7146 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7147 if output: 7148 output.update_positions(self._prev) 7149 return output 7150 7151 def _parse_number(self) -> t.Optional[exp.Expression]: 7152 if self._match_set(self.NUMERIC_PARSERS): 7153 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7154 return self._parse_placeholder() 7155 7156 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7157 if self._match(TokenType.IDENTIFIER): 7158 return self._identifier_expression(quoted=True) 7159 return self._parse_placeholder() 7160 7161 def _parse_var( 7162 self, 7163 any_token: bool = False, 7164 tokens: t.Optional[t.Collection[TokenType]] = None, 7165 upper: bool = False, 7166 ) -> t.Optional[exp.Expression]: 7167 if ( 7168 (any_token and self._advance_any()) 7169 or self._match(TokenType.VAR) 7170 or (self._match_set(tokens) if tokens else False) 7171 ): 7172 return self.expression( 7173 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7174 ) 7175 return self._parse_placeholder() 7176 7177 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7178 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7179 self._advance() 7180 return self._prev 7181 return None 7182 7183 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7184 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7185 7186 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7187 return self._parse_primary() or self._parse_var(any_token=True) 7188 7189 def _parse_null(self) -> t.Optional[exp.Expression]: 7190 if self._match_set(self.NULL_TOKENS): 7191 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7192 return self._parse_placeholder() 7193 7194 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7195 if self._match(TokenType.TRUE): 7196 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7197 if self._match(TokenType.FALSE): 7198 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7199 return self._parse_placeholder() 7200 7201 def _parse_star(self) -> t.Optional[exp.Expression]: 7202 if self._match(TokenType.STAR): 7203 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7204 return self._parse_placeholder() 7205 7206 def _parse_parameter(self) -> exp.Parameter: 7207 this = self._parse_identifier() or self._parse_primary_or_var() 7208 return self.expression(exp.Parameter, this=this) 7209 7210 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7211 if self._match_set(self.PLACEHOLDER_PARSERS): 7212 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7213 if placeholder: 7214 return placeholder 7215 self._advance(-1) 7216 return None 7217 7218 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7219 if not self._match_texts(keywords): 7220 return None 7221 if self._match(TokenType.L_PAREN, advance=False): 7222 return self._parse_wrapped_csv(self._parse_expression) 7223 7224 expression = self._parse_expression() 7225 return [expression] if expression else None 7226 7227 def _parse_csv( 7228 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7229 ) -> t.List[exp.Expression]: 7230 parse_result = parse_method() 7231 items = [parse_result] if parse_result is not None else [] 7232 7233 while self._match(sep): 7234 self._add_comments(parse_result) 7235 parse_result = parse_method() 7236 if parse_result is not None: 7237 items.append(parse_result) 7238 7239 return items 7240 7241 def _parse_tokens( 7242 self, parse_method: t.Callable, expressions: t.Dict 7243 ) -> t.Optional[exp.Expression]: 7244 this = parse_method() 7245 7246 while self._match_set(expressions): 7247 this = self.expression( 7248 expressions[self._prev.token_type], 7249 this=this, 7250 comments=self._prev_comments, 7251 expression=parse_method(), 7252 ) 7253 7254 return this 7255 7256 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7257 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7258 7259 def _parse_wrapped_csv( 7260 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7261 ) -> t.List[exp.Expression]: 7262 return self._parse_wrapped( 7263 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7264 ) 7265 7266 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7267 wrapped = self._match(TokenType.L_PAREN) 7268 if not wrapped and not optional: 7269 self.raise_error("Expecting (") 7270 parse_result = parse_method() 7271 if wrapped: 7272 self._match_r_paren() 7273 return parse_result 7274 7275 def _parse_expressions(self) -> t.List[exp.Expression]: 7276 return self._parse_csv(self._parse_expression) 7277 7278 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7279 return self._parse_select() or self._parse_set_operations( 7280 self._parse_alias(self._parse_assignment(), explicit=True) 7281 if alias 7282 else self._parse_assignment() 7283 ) 7284 7285 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7286 return self._parse_query_modifiers( 7287 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7288 ) 7289 7290 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7291 this = None 7292 if self._match_texts(self.TRANSACTION_KIND): 7293 this = self._prev.text 7294 7295 self._match_texts(("TRANSACTION", "WORK")) 7296 7297 modes = [] 7298 while True: 7299 mode = [] 7300 while self._match(TokenType.VAR): 7301 mode.append(self._prev.text) 7302 7303 if mode: 7304 modes.append(" ".join(mode)) 7305 if not self._match(TokenType.COMMA): 7306 break 7307 7308 return self.expression(exp.Transaction, this=this, modes=modes) 7309 7310 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7311 chain = None 7312 savepoint = None 7313 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7314 7315 self._match_texts(("TRANSACTION", "WORK")) 7316 7317 if self._match_text_seq("TO"): 7318 self._match_text_seq("SAVEPOINT") 7319 savepoint = self._parse_id_var() 7320 7321 if self._match(TokenType.AND): 7322 chain = not self._match_text_seq("NO") 7323 self._match_text_seq("CHAIN") 7324 7325 if is_rollback: 7326 return self.expression(exp.Rollback, savepoint=savepoint) 7327 7328 return self.expression(exp.Commit, chain=chain) 7329 7330 def _parse_refresh(self) -> exp.Refresh: 7331 self._match(TokenType.TABLE) 7332 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7333 7334 def _parse_column_def_with_exists(self): 7335 start = self._index 7336 self._match(TokenType.COLUMN) 7337 7338 exists_column = self._parse_exists(not_=True) 7339 expression = self._parse_field_def() 7340 7341 if not isinstance(expression, exp.ColumnDef): 7342 self._retreat(start) 7343 return None 7344 7345 expression.set("exists", exists_column) 7346 7347 return expression 7348 7349 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7350 if not self._prev.text.upper() == "ADD": 7351 return None 7352 7353 expression = self._parse_column_def_with_exists() 7354 if not expression: 7355 return None 7356 7357 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7358 if self._match_texts(("FIRST", "AFTER")): 7359 position = self._prev.text 7360 column_position = self.expression( 7361 exp.ColumnPosition, this=self._parse_column(), position=position 7362 ) 7363 expression.set("position", column_position) 7364 7365 return expression 7366 7367 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7368 drop = self._match(TokenType.DROP) and self._parse_drop() 7369 if drop and not isinstance(drop, exp.Command): 7370 drop.set("kind", drop.args.get("kind", "COLUMN")) 7371 return drop 7372 7373 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7374 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7375 return self.expression( 7376 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7377 ) 7378 7379 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7380 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7381 self._match_text_seq("ADD") 7382 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7383 return self.expression( 7384 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7385 ) 7386 7387 column_def = self._parse_add_column() 7388 if isinstance(column_def, exp.ColumnDef): 7389 return column_def 7390 7391 exists = self._parse_exists(not_=True) 7392 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7393 return self.expression( 7394 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7395 ) 7396 7397 return None 7398 7399 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7400 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7401 or self._match_text_seq("COLUMNS") 7402 ): 7403 schema = self._parse_schema() 7404 7405 return ( 7406 ensure_list(schema) 7407 if schema 7408 else self._parse_csv(self._parse_column_def_with_exists) 7409 ) 7410 7411 return self._parse_csv(_parse_add_alteration) 7412 7413 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7414 if self._match_texts(self.ALTER_ALTER_PARSERS): 7415 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7416 7417 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7418 # keyword after ALTER we default to parsing this statement 7419 self._match(TokenType.COLUMN) 7420 column = self._parse_field(any_token=True) 7421 7422 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7423 return self.expression(exp.AlterColumn, this=column, drop=True) 7424 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7425 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7426 if self._match(TokenType.COMMENT): 7427 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7428 if self._match_text_seq("DROP", "NOT", "NULL"): 7429 return self.expression( 7430 exp.AlterColumn, 7431 this=column, 7432 drop=True, 7433 allow_null=True, 7434 ) 7435 if self._match_text_seq("SET", "NOT", "NULL"): 7436 return self.expression( 7437 exp.AlterColumn, 7438 this=column, 7439 allow_null=False, 7440 ) 7441 7442 if self._match_text_seq("SET", "VISIBLE"): 7443 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7444 if self._match_text_seq("SET", "INVISIBLE"): 7445 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7446 7447 self._match_text_seq("SET", "DATA") 7448 self._match_text_seq("TYPE") 7449 return self.expression( 7450 exp.AlterColumn, 7451 this=column, 7452 dtype=self._parse_types(), 7453 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7454 using=self._match(TokenType.USING) and self._parse_assignment(), 7455 ) 7456 7457 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7458 if self._match_texts(("ALL", "EVEN", "AUTO")): 7459 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7460 7461 self._match_text_seq("KEY", "DISTKEY") 7462 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7463 7464 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7465 if compound: 7466 self._match_text_seq("SORTKEY") 7467 7468 if self._match(TokenType.L_PAREN, advance=False): 7469 return self.expression( 7470 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7471 ) 7472 7473 self._match_texts(("AUTO", "NONE")) 7474 return self.expression( 7475 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7476 ) 7477 7478 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7479 index = self._index - 1 7480 7481 partition_exists = self._parse_exists() 7482 if self._match(TokenType.PARTITION, advance=False): 7483 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7484 7485 self._retreat(index) 7486 return self._parse_csv(self._parse_drop_column) 7487 7488 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7489 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7490 exists = self._parse_exists() 7491 old_column = self._parse_column() 7492 to = self._match_text_seq("TO") 7493 new_column = self._parse_column() 7494 7495 if old_column is None or to is None or new_column is None: 7496 return None 7497 7498 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7499 7500 self._match_text_seq("TO") 7501 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7502 7503 def _parse_alter_table_set(self) -> exp.AlterSet: 7504 alter_set = self.expression(exp.AlterSet) 7505 7506 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7507 "TABLE", "PROPERTIES" 7508 ): 7509 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7510 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7511 alter_set.set("expressions", [self._parse_assignment()]) 7512 elif self._match_texts(("LOGGED", "UNLOGGED")): 7513 alter_set.set("option", exp.var(self._prev.text.upper())) 7514 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7515 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7516 elif self._match_text_seq("LOCATION"): 7517 alter_set.set("location", self._parse_field()) 7518 elif self._match_text_seq("ACCESS", "METHOD"): 7519 alter_set.set("access_method", self._parse_field()) 7520 elif self._match_text_seq("TABLESPACE"): 7521 alter_set.set("tablespace", self._parse_field()) 7522 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7523 alter_set.set("file_format", [self._parse_field()]) 7524 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7525 alter_set.set("file_format", self._parse_wrapped_options()) 7526 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7527 alter_set.set("copy_options", self._parse_wrapped_options()) 7528 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7529 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7530 else: 7531 if self._match_text_seq("SERDE"): 7532 alter_set.set("serde", self._parse_field()) 7533 7534 properties = self._parse_wrapped(self._parse_properties, optional=True) 7535 alter_set.set("expressions", [properties]) 7536 7537 return alter_set 7538 7539 def _parse_alter(self) -> exp.Alter | exp.Command: 7540 start = self._prev 7541 7542 alter_token = self._match_set(self.ALTERABLES) and self._prev 7543 if not alter_token: 7544 return self._parse_as_command(start) 7545 7546 exists = self._parse_exists() 7547 only = self._match_text_seq("ONLY") 7548 this = self._parse_table(schema=True) 7549 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7550 7551 if self._next: 7552 self._advance() 7553 7554 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7555 if parser: 7556 actions = ensure_list(parser(self)) 7557 not_valid = self._match_text_seq("NOT", "VALID") 7558 options = self._parse_csv(self._parse_property) 7559 7560 if not self._curr and actions: 7561 return self.expression( 7562 exp.Alter, 7563 this=this, 7564 kind=alter_token.text.upper(), 7565 exists=exists, 7566 actions=actions, 7567 only=only, 7568 options=options, 7569 cluster=cluster, 7570 not_valid=not_valid, 7571 ) 7572 7573 return self._parse_as_command(start) 7574 7575 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7576 start = self._prev 7577 # https://duckdb.org/docs/sql/statements/analyze 7578 if not self._curr: 7579 return self.expression(exp.Analyze) 7580 7581 options = [] 7582 while self._match_texts(self.ANALYZE_STYLES): 7583 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7584 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7585 else: 7586 options.append(self._prev.text.upper()) 7587 7588 this: t.Optional[exp.Expression] = None 7589 inner_expression: t.Optional[exp.Expression] = None 7590 7591 kind = self._curr and self._curr.text.upper() 7592 7593 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7594 this = self._parse_table_parts() 7595 elif self._match_text_seq("TABLES"): 7596 if self._match_set((TokenType.FROM, TokenType.IN)): 7597 kind = f"{kind} {self._prev.text.upper()}" 7598 this = self._parse_table(schema=True, is_db_reference=True) 7599 elif self._match_text_seq("DATABASE"): 7600 this = self._parse_table(schema=True, is_db_reference=True) 7601 elif self._match_text_seq("CLUSTER"): 7602 this = self._parse_table() 7603 # Try matching inner expr keywords before fallback to parse table. 7604 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7605 kind = None 7606 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7607 else: 7608 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7609 kind = None 7610 this = self._parse_table_parts() 7611 7612 partition = self._try_parse(self._parse_partition) 7613 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7614 return self._parse_as_command(start) 7615 7616 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7617 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7618 "WITH", "ASYNC", "MODE" 7619 ): 7620 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7621 else: 7622 mode = None 7623 7624 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7625 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7626 7627 properties = self._parse_properties() 7628 return self.expression( 7629 exp.Analyze, 7630 kind=kind, 7631 this=this, 7632 mode=mode, 7633 partition=partition, 7634 properties=properties, 7635 expression=inner_expression, 7636 options=options, 7637 ) 7638 7639 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7640 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7641 this = None 7642 kind = self._prev.text.upper() 7643 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7644 expressions = [] 7645 7646 if not self._match_text_seq("STATISTICS"): 7647 self.raise_error("Expecting token STATISTICS") 7648 7649 if self._match_text_seq("NOSCAN"): 7650 this = "NOSCAN" 7651 elif self._match(TokenType.FOR): 7652 if self._match_text_seq("ALL", "COLUMNS"): 7653 this = "FOR ALL COLUMNS" 7654 if self._match_texts("COLUMNS"): 7655 this = "FOR COLUMNS" 7656 expressions = self._parse_csv(self._parse_column_reference) 7657 elif self._match_text_seq("SAMPLE"): 7658 sample = self._parse_number() 7659 expressions = [ 7660 self.expression( 7661 exp.AnalyzeSample, 7662 sample=sample, 7663 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7664 ) 7665 ] 7666 7667 return self.expression( 7668 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7669 ) 7670 7671 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7672 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7673 kind = None 7674 this = None 7675 expression: t.Optional[exp.Expression] = None 7676 if self._match_text_seq("REF", "UPDATE"): 7677 kind = "REF" 7678 this = "UPDATE" 7679 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7680 this = "UPDATE SET DANGLING TO NULL" 7681 elif self._match_text_seq("STRUCTURE"): 7682 kind = "STRUCTURE" 7683 if self._match_text_seq("CASCADE", "FAST"): 7684 this = "CASCADE FAST" 7685 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7686 ("ONLINE", "OFFLINE") 7687 ): 7688 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7689 expression = self._parse_into() 7690 7691 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7692 7693 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7694 this = self._prev.text.upper() 7695 if self._match_text_seq("COLUMNS"): 7696 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7697 return None 7698 7699 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7700 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7701 if self._match_text_seq("STATISTICS"): 7702 return self.expression(exp.AnalyzeDelete, kind=kind) 7703 return None 7704 7705 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7706 if self._match_text_seq("CHAINED", "ROWS"): 7707 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7708 return None 7709 7710 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7711 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7712 this = self._prev.text.upper() 7713 expression: t.Optional[exp.Expression] = None 7714 expressions = [] 7715 update_options = None 7716 7717 if self._match_text_seq("HISTOGRAM", "ON"): 7718 expressions = self._parse_csv(self._parse_column_reference) 7719 with_expressions = [] 7720 while self._match(TokenType.WITH): 7721 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7722 if self._match_texts(("SYNC", "ASYNC")): 7723 if self._match_text_seq("MODE", advance=False): 7724 with_expressions.append(f"{self._prev.text.upper()} MODE") 7725 self._advance() 7726 else: 7727 buckets = self._parse_number() 7728 if self._match_text_seq("BUCKETS"): 7729 with_expressions.append(f"{buckets} BUCKETS") 7730 if with_expressions: 7731 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7732 7733 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7734 TokenType.UPDATE, advance=False 7735 ): 7736 update_options = self._prev.text.upper() 7737 self._advance() 7738 elif self._match_text_seq("USING", "DATA"): 7739 expression = self.expression(exp.UsingData, this=self._parse_string()) 7740 7741 return self.expression( 7742 exp.AnalyzeHistogram, 7743 this=this, 7744 expressions=expressions, 7745 expression=expression, 7746 update_options=update_options, 7747 ) 7748 7749 def _parse_merge(self) -> exp.Merge: 7750 self._match(TokenType.INTO) 7751 target = self._parse_table() 7752 7753 if target and self._match(TokenType.ALIAS, advance=False): 7754 target.set("alias", self._parse_table_alias()) 7755 7756 self._match(TokenType.USING) 7757 using = self._parse_table() 7758 7759 self._match(TokenType.ON) 7760 on = self._parse_assignment() 7761 7762 return self.expression( 7763 exp.Merge, 7764 this=target, 7765 using=using, 7766 on=on, 7767 whens=self._parse_when_matched(), 7768 returning=self._parse_returning(), 7769 ) 7770 7771 def _parse_when_matched(self) -> exp.Whens: 7772 whens = [] 7773 7774 while self._match(TokenType.WHEN): 7775 matched = not self._match(TokenType.NOT) 7776 self._match_text_seq("MATCHED") 7777 source = ( 7778 False 7779 if self._match_text_seq("BY", "TARGET") 7780 else self._match_text_seq("BY", "SOURCE") 7781 ) 7782 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7783 7784 self._match(TokenType.THEN) 7785 7786 if self._match(TokenType.INSERT): 7787 this = self._parse_star() 7788 if this: 7789 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7790 else: 7791 then = self.expression( 7792 exp.Insert, 7793 this=exp.var("ROW") 7794 if self._match_text_seq("ROW") 7795 else self._parse_value(values=False), 7796 expression=self._match_text_seq("VALUES") and self._parse_value(), 7797 ) 7798 elif self._match(TokenType.UPDATE): 7799 expressions = self._parse_star() 7800 if expressions: 7801 then = self.expression(exp.Update, expressions=expressions) 7802 else: 7803 then = self.expression( 7804 exp.Update, 7805 expressions=self._match(TokenType.SET) 7806 and self._parse_csv(self._parse_equality), 7807 ) 7808 elif self._match(TokenType.DELETE): 7809 then = self.expression(exp.Var, this=self._prev.text) 7810 else: 7811 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7812 7813 whens.append( 7814 self.expression( 7815 exp.When, 7816 matched=matched, 7817 source=source, 7818 condition=condition, 7819 then=then, 7820 ) 7821 ) 7822 return self.expression(exp.Whens, expressions=whens) 7823 7824 def _parse_show(self) -> t.Optional[exp.Expression]: 7825 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7826 if parser: 7827 return parser(self) 7828 return self._parse_as_command(self._prev) 7829 7830 def _parse_set_item_assignment( 7831 self, kind: t.Optional[str] = None 7832 ) -> t.Optional[exp.Expression]: 7833 index = self._index 7834 7835 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7836 return self._parse_set_transaction(global_=kind == "GLOBAL") 7837 7838 left = self._parse_primary() or self._parse_column() 7839 assignment_delimiter = self._match_texts(("=", "TO")) 7840 7841 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7842 self._retreat(index) 7843 return None 7844 7845 right = self._parse_statement() or self._parse_id_var() 7846 if isinstance(right, (exp.Column, exp.Identifier)): 7847 right = exp.var(right.name) 7848 7849 this = self.expression(exp.EQ, this=left, expression=right) 7850 return self.expression(exp.SetItem, this=this, kind=kind) 7851 7852 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7853 self._match_text_seq("TRANSACTION") 7854 characteristics = self._parse_csv( 7855 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7856 ) 7857 return self.expression( 7858 exp.SetItem, 7859 expressions=characteristics, 7860 kind="TRANSACTION", 7861 **{"global": global_}, # type: ignore 7862 ) 7863 7864 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7865 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7866 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7867 7868 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7869 index = self._index 7870 set_ = self.expression( 7871 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7872 ) 7873 7874 if self._curr: 7875 self._retreat(index) 7876 return self._parse_as_command(self._prev) 7877 7878 return set_ 7879 7880 def _parse_var_from_options( 7881 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7882 ) -> t.Optional[exp.Var]: 7883 start = self._curr 7884 if not start: 7885 return None 7886 7887 option = start.text.upper() 7888 continuations = options.get(option) 7889 7890 index = self._index 7891 self._advance() 7892 for keywords in continuations or []: 7893 if isinstance(keywords, str): 7894 keywords = (keywords,) 7895 7896 if self._match_text_seq(*keywords): 7897 option = f"{option} {' '.join(keywords)}" 7898 break 7899 else: 7900 if continuations or continuations is None: 7901 if raise_unmatched: 7902 self.raise_error(f"Unknown option {option}") 7903 7904 self._retreat(index) 7905 return None 7906 7907 return exp.var(option) 7908 7909 def _parse_as_command(self, start: Token) -> exp.Command: 7910 while self._curr: 7911 self._advance() 7912 text = self._find_sql(start, self._prev) 7913 size = len(start.text) 7914 self._warn_unsupported() 7915 return exp.Command(this=text[:size], expression=text[size:]) 7916 7917 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7918 settings = [] 7919 7920 self._match_l_paren() 7921 kind = self._parse_id_var() 7922 7923 if self._match(TokenType.L_PAREN): 7924 while True: 7925 key = self._parse_id_var() 7926 value = self._parse_primary() 7927 if not key and value is None: 7928 break 7929 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7930 self._match(TokenType.R_PAREN) 7931 7932 self._match_r_paren() 7933 7934 return self.expression( 7935 exp.DictProperty, 7936 this=this, 7937 kind=kind.this if kind else None, 7938 settings=settings, 7939 ) 7940 7941 def _parse_dict_range(self, this: str) -> exp.DictRange: 7942 self._match_l_paren() 7943 has_min = self._match_text_seq("MIN") 7944 if has_min: 7945 min = self._parse_var() or self._parse_primary() 7946 self._match_text_seq("MAX") 7947 max = self._parse_var() or self._parse_primary() 7948 else: 7949 max = self._parse_var() or self._parse_primary() 7950 min = exp.Literal.number(0) 7951 self._match_r_paren() 7952 return self.expression(exp.DictRange, this=this, min=min, max=max) 7953 7954 def _parse_comprehension( 7955 self, this: t.Optional[exp.Expression] 7956 ) -> t.Optional[exp.Comprehension]: 7957 index = self._index 7958 expression = self._parse_column() 7959 if not self._match(TokenType.IN): 7960 self._retreat(index - 1) 7961 return None 7962 iterator = self._parse_column() 7963 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7964 return self.expression( 7965 exp.Comprehension, 7966 this=this, 7967 expression=expression, 7968 iterator=iterator, 7969 condition=condition, 7970 ) 7971 7972 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7973 if self._match(TokenType.HEREDOC_STRING): 7974 return self.expression(exp.Heredoc, this=self._prev.text) 7975 7976 if not self._match_text_seq("$"): 7977 return None 7978 7979 tags = ["$"] 7980 tag_text = None 7981 7982 if self._is_connected(): 7983 self._advance() 7984 tags.append(self._prev.text.upper()) 7985 else: 7986 self.raise_error("No closing $ found") 7987 7988 if tags[-1] != "$": 7989 if self._is_connected() and self._match_text_seq("$"): 7990 tag_text = tags[-1] 7991 tags.append("$") 7992 else: 7993 self.raise_error("No closing $ found") 7994 7995 heredoc_start = self._curr 7996 7997 while self._curr: 7998 if self._match_text_seq(*tags, advance=False): 7999 this = self._find_sql(heredoc_start, self._prev) 8000 self._advance(len(tags)) 8001 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8002 8003 self._advance() 8004 8005 self.raise_error(f"No closing {''.join(tags)} found") 8006 return None 8007 8008 def _find_parser( 8009 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8010 ) -> t.Optional[t.Callable]: 8011 if not self._curr: 8012 return None 8013 8014 index = self._index 8015 this = [] 8016 while True: 8017 # The current token might be multiple words 8018 curr = self._curr.text.upper() 8019 key = curr.split(" ") 8020 this.append(curr) 8021 8022 self._advance() 8023 result, trie = in_trie(trie, key) 8024 if result == TrieResult.FAILED: 8025 break 8026 8027 if result == TrieResult.EXISTS: 8028 subparser = parsers[" ".join(this)] 8029 return subparser 8030 8031 self._retreat(index) 8032 return None 8033 8034 def _match(self, token_type, advance=True, expression=None): 8035 if not self._curr: 8036 return None 8037 8038 if self._curr.token_type == token_type: 8039 if advance: 8040 self._advance() 8041 self._add_comments(expression) 8042 return True 8043 8044 return None 8045 8046 def _match_set(self, types, advance=True): 8047 if not self._curr: 8048 return None 8049 8050 if self._curr.token_type in types: 8051 if advance: 8052 self._advance() 8053 return True 8054 8055 return None 8056 8057 def _match_pair(self, token_type_a, token_type_b, advance=True): 8058 if not self._curr or not self._next: 8059 return None 8060 8061 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8062 if advance: 8063 self._advance(2) 8064 return True 8065 8066 return None 8067 8068 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8069 if not self._match(TokenType.L_PAREN, expression=expression): 8070 self.raise_error("Expecting (") 8071 8072 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8073 if not self._match(TokenType.R_PAREN, expression=expression): 8074 self.raise_error("Expecting )") 8075 8076 def _match_texts(self, texts, advance=True): 8077 if ( 8078 self._curr 8079 and self._curr.token_type != TokenType.STRING 8080 and self._curr.text.upper() in texts 8081 ): 8082 if advance: 8083 self._advance() 8084 return True 8085 return None 8086 8087 def _match_text_seq(self, *texts, advance=True): 8088 index = self._index 8089 for text in texts: 8090 if ( 8091 self._curr 8092 and self._curr.token_type != TokenType.STRING 8093 and self._curr.text.upper() == text 8094 ): 8095 self._advance() 8096 else: 8097 self._retreat(index) 8098 return None 8099 8100 if not advance: 8101 self._retreat(index) 8102 8103 return True 8104 8105 def _replace_lambda( 8106 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8107 ) -> t.Optional[exp.Expression]: 8108 if not node: 8109 return node 8110 8111 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8112 8113 for column in node.find_all(exp.Column): 8114 typ = lambda_types.get(column.parts[0].name) 8115 if typ is not None: 8116 dot_or_id = column.to_dot() if column.table else column.this 8117 8118 if typ: 8119 dot_or_id = self.expression( 8120 exp.Cast, 8121 this=dot_or_id, 8122 to=typ, 8123 ) 8124 8125 parent = column.parent 8126 8127 while isinstance(parent, exp.Dot): 8128 if not isinstance(parent.parent, exp.Dot): 8129 parent.replace(dot_or_id) 8130 break 8131 parent = parent.parent 8132 else: 8133 if column is node: 8134 node = dot_or_id 8135 else: 8136 column.replace(dot_or_id) 8137 return node 8138 8139 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8140 start = self._prev 8141 8142 # Not to be confused with TRUNCATE(number, decimals) function call 8143 if self._match(TokenType.L_PAREN): 8144 self._retreat(self._index - 2) 8145 return self._parse_function() 8146 8147 # Clickhouse supports TRUNCATE DATABASE as well 8148 is_database = self._match(TokenType.DATABASE) 8149 8150 self._match(TokenType.TABLE) 8151 8152 exists = self._parse_exists(not_=False) 8153 8154 expressions = self._parse_csv( 8155 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8156 ) 8157 8158 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8159 8160 if self._match_text_seq("RESTART", "IDENTITY"): 8161 identity = "RESTART" 8162 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8163 identity = "CONTINUE" 8164 else: 8165 identity = None 8166 8167 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8168 option = self._prev.text 8169 else: 8170 option = None 8171 8172 partition = self._parse_partition() 8173 8174 # Fallback case 8175 if self._curr: 8176 return self._parse_as_command(start) 8177 8178 return self.expression( 8179 exp.TruncateTable, 8180 expressions=expressions, 8181 is_database=is_database, 8182 exists=exists, 8183 cluster=cluster, 8184 identity=identity, 8185 option=option, 8186 partition=partition, 8187 ) 8188 8189 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8190 this = self._parse_ordered(self._parse_opclass) 8191 8192 if not self._match(TokenType.WITH): 8193 return this 8194 8195 op = self._parse_var(any_token=True) 8196 8197 return self.expression(exp.WithOperator, this=this, op=op) 8198 8199 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8200 self._match(TokenType.EQ) 8201 self._match(TokenType.L_PAREN) 8202 8203 opts: t.List[t.Optional[exp.Expression]] = [] 8204 option: exp.Expression | None 8205 while self._curr and not self._match(TokenType.R_PAREN): 8206 if self._match_text_seq("FORMAT_NAME", "="): 8207 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8208 option = self._parse_format_name() 8209 else: 8210 option = self._parse_property() 8211 8212 if option is None: 8213 self.raise_error("Unable to parse option") 8214 break 8215 8216 opts.append(option) 8217 8218 return opts 8219 8220 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8221 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8222 8223 options = [] 8224 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8225 option = self._parse_var(any_token=True) 8226 prev = self._prev.text.upper() 8227 8228 # Different dialects might separate options and values by white space, "=" and "AS" 8229 self._match(TokenType.EQ) 8230 self._match(TokenType.ALIAS) 8231 8232 param = self.expression(exp.CopyParameter, this=option) 8233 8234 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8235 TokenType.L_PAREN, advance=False 8236 ): 8237 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8238 param.set("expressions", self._parse_wrapped_options()) 8239 elif prev == "FILE_FORMAT": 8240 # T-SQL's external file format case 8241 param.set("expression", self._parse_field()) 8242 else: 8243 param.set("expression", self._parse_unquoted_field()) 8244 8245 options.append(param) 8246 self._match(sep) 8247 8248 return options 8249 8250 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8251 expr = self.expression(exp.Credentials) 8252 8253 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8254 expr.set("storage", self._parse_field()) 8255 if self._match_text_seq("CREDENTIALS"): 8256 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8257 creds = ( 8258 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8259 ) 8260 expr.set("credentials", creds) 8261 if self._match_text_seq("ENCRYPTION"): 8262 expr.set("encryption", self._parse_wrapped_options()) 8263 if self._match_text_seq("IAM_ROLE"): 8264 expr.set("iam_role", self._parse_field()) 8265 if self._match_text_seq("REGION"): 8266 expr.set("region", self._parse_field()) 8267 8268 return expr 8269 8270 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8271 return self._parse_field() 8272 8273 def _parse_copy(self) -> exp.Copy | exp.Command: 8274 start = self._prev 8275 8276 self._match(TokenType.INTO) 8277 8278 this = ( 8279 self._parse_select(nested=True, parse_subquery_alias=False) 8280 if self._match(TokenType.L_PAREN, advance=False) 8281 else self._parse_table(schema=True) 8282 ) 8283 8284 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8285 8286 files = self._parse_csv(self._parse_file_location) 8287 credentials = self._parse_credentials() 8288 8289 self._match_text_seq("WITH") 8290 8291 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8292 8293 # Fallback case 8294 if self._curr: 8295 return self._parse_as_command(start) 8296 8297 return self.expression( 8298 exp.Copy, 8299 this=this, 8300 kind=kind, 8301 credentials=credentials, 8302 files=files, 8303 params=params, 8304 ) 8305 8306 def _parse_normalize(self) -> exp.Normalize: 8307 return self.expression( 8308 exp.Normalize, 8309 this=self._parse_bitwise(), 8310 form=self._match(TokenType.COMMA) and self._parse_var(), 8311 ) 8312 8313 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8314 args = self._parse_csv(lambda: self._parse_lambda()) 8315 8316 this = seq_get(args, 0) 8317 decimals = seq_get(args, 1) 8318 8319 return expr_type( 8320 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8321 ) 8322 8323 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8324 star_token = self._prev 8325 8326 if self._match_text_seq("COLUMNS", "(", advance=False): 8327 this = self._parse_function() 8328 if isinstance(this, exp.Columns): 8329 this.set("unpack", True) 8330 return this 8331 8332 return self.expression( 8333 exp.Star, 8334 **{ # type: ignore 8335 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8336 "replace": self._parse_star_op("REPLACE"), 8337 "rename": self._parse_star_op("RENAME"), 8338 }, 8339 ).update_positions(star_token) 8340 8341 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8342 privilege_parts = [] 8343 8344 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8345 # (end of privilege list) or L_PAREN (start of column list) are met 8346 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8347 privilege_parts.append(self._curr.text.upper()) 8348 self._advance() 8349 8350 this = exp.var(" ".join(privilege_parts)) 8351 expressions = ( 8352 self._parse_wrapped_csv(self._parse_column) 8353 if self._match(TokenType.L_PAREN, advance=False) 8354 else None 8355 ) 8356 8357 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8358 8359 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8360 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8361 principal = self._parse_id_var() 8362 8363 if not principal: 8364 return None 8365 8366 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8367 8368 def _parse_grant(self) -> exp.Grant | exp.Command: 8369 start = self._prev 8370 8371 privileges = self._parse_csv(self._parse_grant_privilege) 8372 8373 self._match(TokenType.ON) 8374 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8375 8376 # Attempt to parse the securable e.g. MySQL allows names 8377 # such as "foo.*", "*.*" which are not easily parseable yet 8378 securable = self._try_parse(self._parse_table_parts) 8379 8380 if not securable or not self._match_text_seq("TO"): 8381 return self._parse_as_command(start) 8382 8383 principals = self._parse_csv(self._parse_grant_principal) 8384 8385 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8386 8387 if self._curr: 8388 return self._parse_as_command(start) 8389 8390 return self.expression( 8391 exp.Grant, 8392 privileges=privileges, 8393 kind=kind, 8394 securable=securable, 8395 principals=principals, 8396 grant_option=grant_option, 8397 ) 8398 8399 def _parse_overlay(self) -> exp.Overlay: 8400 return self.expression( 8401 exp.Overlay, 8402 **{ # type: ignore 8403 "this": self._parse_bitwise(), 8404 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8405 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8406 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8407 }, 8408 ) 8409 8410 def _parse_format_name(self) -> exp.Property: 8411 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8412 # for FILE_FORMAT = <format_name> 8413 return self.expression( 8414 exp.Property, 8415 this=exp.var("FORMAT_NAME"), 8416 value=self._parse_string() or self._parse_table_parts(), 8417 ) 8418 8419 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8420 args: t.List[exp.Expression] = [] 8421 8422 if self._match(TokenType.DISTINCT): 8423 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8424 self._match(TokenType.COMMA) 8425 8426 args.extend(self._parse_csv(self._parse_assignment)) 8427 8428 return self.expression( 8429 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8430 ) 8431 8432 def _identifier_expression( 8433 self, token: t.Optional[Token] = None, **kwargs: t.Any 8434 ) -> exp.Identifier: 8435 token = token or self._prev 8436 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8437 expression.update_positions(token) 8438 return expression 8439 8440 def _build_pipe_cte( 8441 self, 8442 query: exp.Query, 8443 expressions: t.List[exp.Expression], 8444 alias_cte: t.Optional[exp.TableAlias] = None, 8445 ) -> exp.Select: 8446 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8447 if alias_cte: 8448 new_cte = alias_cte 8449 else: 8450 self._pipe_cte_counter += 1 8451 new_cte = f"__tmp{self._pipe_cte_counter}" 8452 8453 with_ = query.args.get("with") 8454 ctes = with_.pop() if with_ else None 8455 8456 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8457 if ctes: 8458 new_select.set("with", ctes) 8459 8460 return new_select.with_(new_cte, as_=query, copy=False) 8461 8462 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8463 select = self._parse_select(consume_pipe=False) 8464 if not select: 8465 return query 8466 8467 return self._build_pipe_cte( 8468 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8469 ) 8470 8471 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8472 limit = self._parse_limit() 8473 offset = self._parse_offset() 8474 if limit: 8475 curr_limit = query.args.get("limit", limit) 8476 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8477 query.limit(limit, copy=False) 8478 if offset: 8479 curr_offset = query.args.get("offset") 8480 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8481 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8482 8483 return query 8484 8485 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8486 this = self._parse_assignment() 8487 if self._match_text_seq("GROUP", "AND", advance=False): 8488 return this 8489 8490 this = self._parse_alias(this) 8491 8492 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8493 return self._parse_ordered(lambda: this) 8494 8495 return this 8496 8497 def _parse_pipe_syntax_aggregate_group_order_by( 8498 self, query: exp.Select, group_by_exists: bool = True 8499 ) -> exp.Select: 8500 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8501 aggregates_or_groups, orders = [], [] 8502 for element in expr: 8503 if isinstance(element, exp.Ordered): 8504 this = element.this 8505 if isinstance(this, exp.Alias): 8506 element.set("this", this.args["alias"]) 8507 orders.append(element) 8508 else: 8509 this = element 8510 aggregates_or_groups.append(this) 8511 8512 if group_by_exists: 8513 query.select(*aggregates_or_groups, copy=False).group_by( 8514 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8515 copy=False, 8516 ) 8517 else: 8518 query.select(*aggregates_or_groups, append=False, copy=False) 8519 8520 if orders: 8521 return query.order_by(*orders, append=False, copy=False) 8522 8523 return query 8524 8525 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8526 self._match_text_seq("AGGREGATE") 8527 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8528 8529 if self._match(TokenType.GROUP_BY) or ( 8530 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8531 ): 8532 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8533 8534 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8535 8536 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8537 first_setop = self.parse_set_operation(this=query) 8538 if not first_setop: 8539 return None 8540 8541 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8542 expr = self._parse_paren() 8543 return expr.assert_is(exp.Subquery).unnest() if expr else None 8544 8545 first_setop.this.pop() 8546 8547 setops = [ 8548 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8549 *self._parse_csv(_parse_and_unwrap_query), 8550 ] 8551 8552 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8553 with_ = query.args.get("with") 8554 ctes = with_.pop() if with_ else None 8555 8556 if isinstance(first_setop, exp.Union): 8557 query = query.union(*setops, copy=False, **first_setop.args) 8558 elif isinstance(first_setop, exp.Except): 8559 query = query.except_(*setops, copy=False, **first_setop.args) 8560 else: 8561 query = query.intersect(*setops, copy=False, **first_setop.args) 8562 8563 query.set("with", ctes) 8564 8565 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8566 8567 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8568 join = self._parse_join() 8569 if not join: 8570 return None 8571 8572 if isinstance(query, exp.Select): 8573 return query.join(join, copy=False) 8574 8575 return query 8576 8577 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8578 pivots = self._parse_pivots() 8579 if not pivots: 8580 return query 8581 8582 from_ = query.args.get("from") 8583 if from_: 8584 from_.this.set("pivots", pivots) 8585 8586 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8587 8588 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8589 self._match_text_seq("EXTEND") 8590 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8591 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8592 8593 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8594 sample = self._parse_table_sample() 8595 8596 with_ = query.args.get("with") 8597 if with_: 8598 with_.expressions[-1].this.set("sample", sample) 8599 else: 8600 query.set("sample", sample) 8601 8602 return query 8603 8604 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8605 if isinstance(query, exp.Subquery): 8606 query = exp.select("*").from_(query, copy=False) 8607 8608 if not query.args.get("from"): 8609 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8610 8611 while self._match(TokenType.PIPE_GT): 8612 start = self._curr 8613 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8614 if not parser: 8615 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8616 # keywords, making it tricky to disambiguate them without lookahead. The approach 8617 # here is to try and parse a set operation and if that fails, then try to parse a 8618 # join operator. If that fails as well, then the operator is not supported. 8619 parsed_query = self._parse_pipe_syntax_set_operator(query) 8620 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8621 if not parsed_query: 8622 self._retreat(start) 8623 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8624 break 8625 query = parsed_query 8626 else: 8627 query = parser(self, query) 8628 8629 return query 8630 8631 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8632 vars = self._parse_csv(self._parse_id_var) 8633 if not vars: 8634 return None 8635 8636 return self.expression( 8637 exp.DeclareItem, 8638 this=vars, 8639 kind=self._parse_types(), 8640 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8641 ) 8642 8643 def _parse_declare(self) -> exp.Declare | exp.Command: 8644 start = self._prev 8645 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8646 8647 if not expressions or self._curr: 8648 return self._parse_as_command(start) 8649 8650 return self.expression(exp.Declare, expressions=expressions)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1559 def __init__( 1560 self, 1561 error_level: t.Optional[ErrorLevel] = None, 1562 error_message_context: int = 100, 1563 max_errors: int = 3, 1564 dialect: DialectType = None, 1565 ): 1566 from sqlglot.dialects import Dialect 1567 1568 self.error_level = error_level or ErrorLevel.IMMEDIATE 1569 self.error_message_context = error_message_context 1570 self.max_errors = max_errors 1571 self.dialect = Dialect.get_or_raise(dialect) 1572 self.reset()
1585 def parse( 1586 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1587 ) -> t.List[t.Optional[exp.Expression]]: 1588 """ 1589 Parses a list of tokens and returns a list of syntax trees, one tree 1590 per parsed SQL statement. 1591 1592 Args: 1593 raw_tokens: The list of tokens. 1594 sql: The original SQL string, used to produce helpful debug messages. 1595 1596 Returns: 1597 The list of the produced syntax trees. 1598 """ 1599 return self._parse( 1600 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1601 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1603 def parse_into( 1604 self, 1605 expression_types: exp.IntoType, 1606 raw_tokens: t.List[Token], 1607 sql: t.Optional[str] = None, 1608 ) -> t.List[t.Optional[exp.Expression]]: 1609 """ 1610 Parses a list of tokens into a given Expression type. If a collection of Expression 1611 types is given instead, this method will try to parse the token list into each one 1612 of them, stopping at the first for which the parsing succeeds. 1613 1614 Args: 1615 expression_types: The expression type(s) to try and parse the token list into. 1616 raw_tokens: The list of tokens. 1617 sql: The original SQL string, used to produce helpful debug messages. 1618 1619 Returns: 1620 The target Expression. 1621 """ 1622 errors = [] 1623 for expression_type in ensure_list(expression_types): 1624 parser = self.EXPRESSION_PARSERS.get(expression_type) 1625 if not parser: 1626 raise TypeError(f"No parser registered for {expression_type}") 1627 1628 try: 1629 return self._parse(parser, raw_tokens, sql) 1630 except ParseError as e: 1631 e.errors[0]["into_expression"] = expression_type 1632 errors.append(e) 1633 1634 raise ParseError( 1635 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1636 errors=merge_errors(errors), 1637 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1677 def check_errors(self) -> None: 1678 """Logs or raises any found errors, depending on the chosen error level setting.""" 1679 if self.error_level == ErrorLevel.WARN: 1680 for error in self.errors: 1681 logger.error(str(error)) 1682 elif self.error_level == ErrorLevel.RAISE and self.errors: 1683 raise ParseError( 1684 concat_messages(self.errors, self.max_errors), 1685 errors=merge_errors(self.errors), 1686 )
Logs or raises any found errors, depending on the chosen error level setting.
1688 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1689 """ 1690 Appends an error in the list of recorded errors or raises it, depending on the chosen 1691 error level setting. 1692 """ 1693 token = token or self._curr or self._prev or Token.string("") 1694 start = token.start 1695 end = token.end + 1 1696 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1697 highlight = self.sql[start:end] 1698 end_context = self.sql[end : end + self.error_message_context] 1699 1700 error = ParseError.new( 1701 f"{message}. Line {token.line}, Col: {token.col}.\n" 1702 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1703 description=message, 1704 line=token.line, 1705 col=token.col, 1706 start_context=start_context, 1707 highlight=highlight, 1708 end_context=end_context, 1709 ) 1710 1711 if self.error_level == ErrorLevel.IMMEDIATE: 1712 raise error 1713 1714 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1716 def expression( 1717 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1718 ) -> E: 1719 """ 1720 Creates a new, validated Expression. 1721 1722 Args: 1723 exp_class: The expression class to instantiate. 1724 comments: An optional list of comments to attach to the expression. 1725 kwargs: The arguments to set for the expression along with their respective values. 1726 1727 Returns: 1728 The target expression. 1729 """ 1730 instance = exp_class(**kwargs) 1731 instance.add_comments(comments) if comments else self._add_comments(instance) 1732 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1739 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1740 """ 1741 Validates an Expression, making sure that all its mandatory arguments are set. 1742 1743 Args: 1744 expression: The expression to validate. 1745 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1746 1747 Returns: 1748 The validated expression. 1749 """ 1750 if self.error_level != ErrorLevel.IGNORE: 1751 for error_message in expression.error_messages(args): 1752 self.raise_error(error_message) 1753 1754 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4793 def parse_set_operation( 4794 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4795 ) -> t.Optional[exp.Expression]: 4796 start = self._index 4797 _, side_token, kind_token = self._parse_join_parts() 4798 4799 side = side_token.text if side_token else None 4800 kind = kind_token.text if kind_token else None 4801 4802 if not self._match_set(self.SET_OPERATIONS): 4803 self._retreat(start) 4804 return None 4805 4806 token_type = self._prev.token_type 4807 4808 if token_type == TokenType.UNION: 4809 operation: t.Type[exp.SetOperation] = exp.Union 4810 elif token_type == TokenType.EXCEPT: 4811 operation = exp.Except 4812 else: 4813 operation = exp.Intersect 4814 4815 comments = self._prev.comments 4816 4817 if self._match(TokenType.DISTINCT): 4818 distinct: t.Optional[bool] = True 4819 elif self._match(TokenType.ALL): 4820 distinct = False 4821 else: 4822 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4823 if distinct is None: 4824 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4825 4826 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4827 "STRICT", "CORRESPONDING" 4828 ) 4829 if self._match_text_seq("CORRESPONDING"): 4830 by_name = True 4831 if not side and not kind: 4832 kind = "INNER" 4833 4834 on_column_list = None 4835 if by_name and self._match_texts(("ON", "BY")): 4836 on_column_list = self._parse_wrapped_csv(self._parse_column) 4837 4838 expression = self._parse_select( 4839 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4840 ) 4841 4842 return self.expression( 4843 operation, 4844 comments=comments, 4845 this=this, 4846 distinct=distinct, 4847 by_name=by_name, 4848 expression=expression, 4849 side=side, 4850 kind=kind, 4851 on=on_column_list, 4852 )